示例#1
0
 def __init__(self, db, connections, parent=None):
     """Creates a new instance of ReportGenerator.
     @param db DatabaseBackend to be used.
     @param connections Connections for which the report should be generated.
     @param parent Qt Parent
     """
     super(ReportGenerator, self).__init__(parent)
     self._db = db
     self._connections = connections
     self._diagramnames = [
         ["TS_TOTAL",
             self.tr("Total amount of logged trains per time-slice"),
             self.tr("The following diagram shows the amount of all "\
                     "trains that were logged per time-slice.")],
         ["TS_PUNC",
             self.tr("Punctuality per time-slice"),
             self.tr("The following diagram shows the percentage of "\
                     "punctuality per time-slice of all trains logged.")],
         ["TS_TOTAL_WD",
             self.tr("Total amount of logged trains on work days only"),
             self.tr("The next diagram shows the amount of all "\
                     "logged trains per time-slice for workdays "\
                     "only.")],
         ["TS_PUNC_WD",
             self.tr("Punctuality per time-slice on work days only"),
             self.tr("This diagram shows the percentage of punctuality "\
                     "of all logged trains per time-slice for workdays "\
                     "only.")],
         ["WD_TOTAL",
             self.tr("Total amount of logged trains per weekday"),
             self.tr("The next diagram shows the amount of all logged "\
                     "trains sorted by week day.")],
         ["WD_PUNC",
             self.tr("Punctuality per time-slice pwer weekday"),
             self.tr("The following diagram shows the percentage of "\
                     "punctuality for all trains logged sorted by "\
                     "week day.")]
     ]
     self._dialogInterpretations = DialogAddInterpretation()
示例#2
0
class ReportGenerator(QObject):
    def __init__(self, db, connections, parent=None):
        """Creates a new instance of ReportGenerator.
        @param db DatabaseBackend to be used.
        @param connections Connections for which the report should be generated.
        @param parent Qt Parent
        """
        super(ReportGenerator, self).__init__(parent)
        self._db = db
        self._connections = connections
        self._diagramnames = [
            ["TS_TOTAL",
                self.tr("Total amount of logged trains per time-slice"),
                self.tr("The following diagram shows the amount of all "\
                        "trains that were logged per time-slice.")],
            ["TS_PUNC",
                self.tr("Punctuality per time-slice"),
                self.tr("The following diagram shows the percentage of "\
                        "punctuality per time-slice of all trains logged.")],
            ["TS_TOTAL_WD",
                self.tr("Total amount of logged trains on work days only"),
                self.tr("The next diagram shows the amount of all "\
                        "logged trains per time-slice for workdays "\
                        "only.")],
            ["TS_PUNC_WD",
                self.tr("Punctuality per time-slice on work days only"),
                self.tr("This diagram shows the percentage of punctuality "\
                        "of all logged trains per time-slice for workdays "\
                        "only.")],
            ["WD_TOTAL",
                self.tr("Total amount of logged trains per weekday"),
                self.tr("The next diagram shows the amount of all logged "\
                        "trains sorted by week day.")],
            ["WD_PUNC",
                self.tr("Punctuality per time-slice pwer weekday"),
                self.tr("The following diagram shows the percentage of "\
                        "punctuality for all trains logged sorted by "\
                        "week day.")]
        ]
        self._dialogInterpretations = DialogAddInterpretation()

    def doit(self):
        """Creates the report."""
        d = QProgressDialog(
                self.tr("Generating Diagrams..."), self.tr("Cancel"),
                0, len(self._connections) * len(self._diagramnames))
        d.setWindowModality(Qt.WindowModal)
        d.show()
        aborted = False
        for c in self._connections:
            aborted = self._createDiagrams(c, d)
        d.hide()
        if not aborted:
            self._collectInterpretations()
            self._generateReport()

    def _collectInterpretations(self):
        """Opens a dialog which shows all generated diagrams and lets the user
        enter his own interpretations of the diagram, which then will be
        plotted in the TeX'ed repord."""
        for c in self._connections:
            for i in range(len(self._diagramnames)):
                self._dialogInterpretations.addDiagram(
                        self._getDiagramName(i, c),
                        self._diagramnames[i][1], c)
        ret = self._dialogInterpretations.exec_()
        self._printInterpretations = ret == DialogAddInterpretation.Accepted

    def _createDiagrams(self, connection, d):
        """Creates all diagrams that will be embedded in the report.
        The diagrams will be stored to disk twice. Once as an eps file which
        will be used for embedding in the tex'ed report, and once as png file
        which will be used for displaying the diagram when the user is offered
        to place comments underneath a diagram.
        """
        self._trainsTotal = self._getTotalTrainsLogged(connection)
        self._trainsDelayed = self._getTotalTrainsDelayed(connection)

        QApplication.processEvents()
        num = 0
        self.generateTimeSliceDiagram(
                connection,
                None,
                self.computeTotalTrains,
                self.tr("Amount of logged trains"),
                self._diagramnames[num][1])
        self._storeDiagram(num, connection)
        if self._increaseProgressCounter(d): return True

        num += 1
        self.generateTimeSliceDiagram(
                connection,
                None,
                self.computePunctuality,
                self.tr("Punctuality in %"),
                self._diagramnames[num][1])
        self._storeDiagram(1, connection)
        if self._increaseProgressCounter(d): return True

        num += 1
        self.generateTimeSliceDiagram(
                connection,
                self.isDepartureAtWorkDay,
                self.computeTotalTrains,
                self.tr("Amount of logged trains"),
                self._diagramnames[num][1])
        self._storeDiagram(2, connection)
        if self._increaseProgressCounter(d): return True

        num += 1
        self.generateTimeSliceDiagram(
                connection,
                self.isDepartureAtWorkDay,
                self.computePunctuality,
                self.tr("Punctuality in %"),
                self._diagramnames[num][1])
        self._storeDiagram(3, connection)
        if self._increaseProgressCounter(d): return True

        self.generateWeekDayDiagram(
                connection,
                None,
                self.computeTotalTrains,
                self.tr("Amount of logged trains"),
                self._diagramnames[num][1])
        self._storeDiagram(4, connection)
        if self._increaseProgressCounter(d): return True

        num += 1
        self.generateWeekDayDiagram(
                connection,
                None,
                self.computePunctuality,
                self.tr("Punctuality in %"),
                self._diagramnames[num][1])
        self._storeDiagram(5, connection)
        if self._increaseProgressCounter(d): return True

        return False

    def _increaseProgressCounter(self, d):
        """Increaes the current value of the progress dialog."""
        if d.wasCanceled():
            return True
        QApplication.processEvents()
        d.setValue(d.value() + 1)
        return False

    def _storeDiagram(self, idx, connection):
        """Saves the current figure to an eps and a png temp file.
        @param idx Index of the diagram name in self._diagramnames.
        @param connection Connection for which the diagram will be created.
        """
        nameeps = self._getDiagramName(idx, connection) + ".eps"
        namepng = self._getDiagramName(idx, connection) + ".png"
        plt.savefig(nameeps, format="eps", dpi=300)
        plt.savefig(namepng, format="png", dpi=300)

    def generateWeekDayDiagram(self,
            connection,
            trainFilterFunc,
            diagramFunc,
            ylabel,
            title):
        """Generates a matplotlib.pyplot diagram.
        The x-axis of the diagram is divided into two hour time slices.
        The algorithm computes a list with all trains within the currently
        processed time slice as well as the sum of all trains logged and the sum
        of all delayed trains.
        These will be passed to the supplied diagramFunc, which will then
        retrieve the value for the bar(s) the is/are displayed for the current
        time slice.
        Trains can also be filtered evaluation by supplying a trainFilterFunc.
        @param connection The connection for which the diagram should be
        generated.
        @param trainFilterFunc A filter that will be applied to the list of
        total and delayed trains for each time slice. May be None.
        @param diagramFunc A function that takes the sum of total and delayed
        trains in the current timeslice as parameters. Is used to compute the
        value for a bar in the current time slice.
        @param ylabel The label of the y-axis in the diagram.
        @param title The title of the diagram.
        """
        fig = plt.figure()
        plot = fig.add_subplot(111)
        width = 0.5
        offset = 1.0
        barColors = ['r', 'g', 'b', 'y', 'black']
        productColors = {}
        labels = []
        weekDays = [self.tr("Mo"), self.tr("Tu"), self.tr("We"), self.tr("Th"),
                    self.tr("Fr"), self.tr("Sa"), self.tr("Su")]
        spacing = 0
        for strWeekDay, numWeekDay in zip(weekDays, range(len(weekDays))):
            ttotal, tdelayed = self._trainsForWeekDay(connection, numWeekDay)
            # filter trains if necessary.
            if not trainFilterFunc is None:
                ttotal = filter(trainFilterFunc, ttotal)
                tdelayed= filter(trainFilterFunc, tdelayed)
            sumPerProductTotal, products = self._sumProducts(ttotal)
            sumPerProductDelayed, dummy = self._sumProducts(tdelayed)

            colorcnt = 0
            for p in products.iterkeys():
                st = sumPerProductTotal[p]
                if sumPerProductDelayed.has_key(p):
                    sd = sumPerProductDelayed[p]
                else:
                    sd = 0

                # call diagramFunc to retrieve value for this bar
                value = diagramFunc(st, sd)
                if value <= 0:
                    value = 0.001 # clamp to an epsilon so that matplotlib draws
                                  # the bar

                # retrieve product color
                c = barColors[colorcnt]
                if not p in labels:
                    productColors[p] = c

                x = offset + spacing + colorcnt * width
                bar = plot.bar(x, value, width, color=productColors[p])

                # set label for the first time a certain product is processed
                if not p in labels:
                    labels.append(p)
                    bar[0].set_label(p)
                colorcnt += 1
            spacing += 2

        plot.legend(loc='upper left')
        plot.set_ylabel(ylabel)
        plot.set_xlabel(self.tr("Week Day"))
        plot.set_title(title)
        plot.set_xticks(np.arange(0, len(weekDays) * 2, 2) + offset)
        plot.set_xticklabels(weekDays)

        return plot

    def isDepartureAtWorkDay(self, train):
        """Checks if the departure of a train is on a work day."""
        dep = train.dateAsTime()
        if not dep is None:
            return dep.tm_wday < 5
        return False

    def generateTimeSliceDiagram(self,
            connection,
            trainFilterFunc,
            diagramFunc,
            ylabel,
            title):
        """Generates a matplotlib.pyplot diagram.
        The x-axis of the diagram is divided into two hour time slices.
        The algorithm computes a list with all trains within the currently
        processed time slice as well as the sum of all trains logged and the sum
        of all delayed trains.
        These will be passed to the supplied diagramFunc, which will then
        retrieve the value for the bar(s) the is/are displayed for the current
        time slice.
        Trains can also be filtered evaluation by supplying a trainFilterFunc.
        @param connection The connection for which the diagram should be
        generated.
        @param trainFilterFunc A filter that will be applied to the list of
        total and delayed trains for each time slice. May be None.
        @param diagramFunc A function that takes the sum of total and delayed
        trains in the current timeslice as parameters. Is used to compute the
        value for a bar in the current time slice.
        @param ylabel The label of the y-axis in the diagram.
        @param title The title of the diagram.
        """
        fig = plt.figure(figsize=(8, 6), dpi=400)
        plot = fig.add_subplot(111)
        width = 0.5
        offset = 1.0
        barColors = ['r', 'g', 'b', 'y', 'black']
        productColors = {}
        labels = []
        timeRange = range(0, 24, 2)
        for timeSlice in timeRange:
            tfrom = strptime(":".join([str(timeSlice), "00"]), "%H:%M")
            tto = strptime(":".join([str(timeSlice+1), "59"]), "%H:%M")
            ttotal, tdelayed = self._trainsInTimeSlice(connection, tfrom, tto)
            # filter trains if necessary.
            if not trainFilterFunc is None:
                ttotal = filter(trainFilterFunc, ttotal)
                tdelayed= filter(trainFilterFunc, tdelayed)
            sumPerProductTotal, products = self._sumProducts(ttotal)
            sumPerProductDelayed, dummy = self._sumProducts(tdelayed)

            colorcnt = 0
            for p in products.iterkeys():
                st = sumPerProductTotal[p]
                if sumPerProductDelayed.has_key(p):
                    sd = sumPerProductDelayed[p]
                else:
                    sd = 0

                # call diagramFunc to retrieve value for this bar
                value = diagramFunc(st, sd)

                # retrieve product color
                c = barColors[colorcnt]
                if not p in labels:
                    productColors[p] = c

                bar = plot.bar(
                    offset + timeSlice + colorcnt * width, value, width,
                    color=productColors[p])

                # set label for the first time a certain product is processed
                if not p in labels:
                    labels.append(p)
                    bar[0].set_label(p)
                colorcnt += 1

        plot.legend(loc='upper left')
        plot.set_ylabel(ylabel)
        plot.set_xlabel(self.tr("Time Slice"))
        plot.set_title(title)
        plot.set_xticks([i + width + offset for i in timeRange])
        plot.set_xticklabels(
                [str(f) + ":00\n" + str(f+1) + ":59"\
                    for f in timeRange], size='xx-small')

        return plot

    def computePunctuality(self, sumTotal, sumDelayed):
        """Computes the punctuality of all trains within a time slice in %.
        @param sumTotal Total amount of trains within a time slice.
        @param sumDelayed Total amount of delayed trains within a time slice.
        """
        try:
            return 100.0 - float(sumDelayed) / float(sumTotal) * 100
        except:
            return 0

    def computeTotalTrains(self, sumTotal, sumDelayed):
        """Returns the total amount of trains within a time slice.
        This is dummy function used to not break the logic in
        generateTimeSliceDiagram.
        @param sumTotal Total amount of trains within a time slice.
        @param sumDelayed Total amount of delayed trains within a time slice.
        """
        return sumTotal

    def _trainsInTimeSlice(self, connection, tfrom, tto):
        """Retrieves a list of trains within a given time slice.
        @param connection The connection for which the trains should be
        retrieved.
        @param tfrom Start of the time slice.
        @param tto End of the time slice.
        """
        connFilter = TrainFilter()
        connFilter.setConnectionFilter(connection)
        connFilter.setTimeFilter((tfrom, tto))
        delayedFilter = TrainFilter()
        delayedFilter.setConnectionFilter(connection)
        delayedFilter.setTimeFilter((tfrom, tto))
        delayedFilter.setDelayFilter(0)
        total = filter(
                lambda x: connFilter.trainPassesFilter(x),
                self._trainsTotal)
        delayed = filter(
                lambda x: delayedFilter.trainPassesFilter(x),
                self._trainsDelayed)
        return total, delayed

    def _trainsForWeekDay(self, connection, weekDay):
        """Retrieves a list of trains fr a given week day.
        @param connection The connection for which the trains should be
        retrieved.
        @param weekDay Week day to retrieve trains for.
        """
        connFilter = TrainFilter()
        connFilter.setConnectionFilter(connection)
        delayedFilter = TrainFilter()
        delayedFilter.setConnectionFilter(connection)
        delayedFilter.setDelayFilter(0)
        total = self._trainsTotal
        delayed = self._trainsDelayed
        total = filter(lambda x: x.dateAsTime().tm_wday == weekDay, total)
        delayed = filter(lambda x: x.dateAsTime().tm_wday == weekDay, delayed)
        return total, delayed

    def _sumProducts(self, trains):
        """Sums up the amount of trains for every product that occurs.
        @param trains List of trains.
        @return dict containing all products as keys and their sum as value and
        a list with all occured products.
        """
        sumPerProduct = {}
        products = {}
        for t in trains:
            p = t.product()
            products[p] = 1
            if not sumPerProduct.has_key(p):
                sumPerProduct[p] = 1
            else:
                sumPerProduct[p] += 1
        return sumPerProduct, products

    def _generateReport(self):
        """Creates the report"""
        tex = []
        tex.append(r"\documentclass[a4paper,10pt]{article}")
        tex.append(r"\usepackage[utf8x]{inputenc}")
        tex.append(r"\usepackage{longtable}")
        tex.append(r"\usepackage{graphicx}")
        tex.append(r"\usepackage{ngerman}")
        tex.append(
                r"\title{" + self.tr("Deutsche Bahn Punctuality Report") + r"}")
        tex.append(r"\begin{document}")
        tex.append(r"\maketitle")
        tex.append(r"\tableofcontents")

        tex.append(r"\newpage")
        tex.append(r"\section{" + self.tr("Preface") + r"}")
        tex.append(self._getPreface() + r"\\")

        tex.append(r"\newpage")
        tex.append(r"\section{" + self.tr("Surrounding Conditions") + r"}")
        tex.append(self._getSurroundingConditions() + r"\\")

        tex.append(r"\newpage")
        tex.append(r"\section{" + self.tr("Global Overview") + r"}")
        tex.append(self._getGlobalOverview())

        tex.append(r"\newpage")
        tex.append(r"\section{" + self.tr("Detailed Analysis") + r"}")
        tex.append(self._getAnalysis())

        tex.append(r"\newpage")
        tex.append(r"\section{" + self.tr("Appendix") + r"}")

        tex.append(r"\end{document}")
        src = "\n".join(tex)
        f = open("report.tex", "w")
        f.write(src)
        f.close()
        os.system('latex ' + "report.tex")
        os.system('latex ' + "report.tex") #compile twice for proper indexing
        print "Baking pdf..."
        os.system('latex ' + " --output-format=pdf report.tex") #dump pdf

    def _getAnalysis(self):
        """Gets the text that will be shown in the Detailed Analysis section."""
        s = ""
        s += self.tr("This section takes a more detailled look into "\
                "train delays.\\\\\nEvery logged travel connection will be "\
                "analyzed through two hour time slices globally and for "\
                "work days only. The delays will also be analyzed by "\
                "week days.\\\n")
        for c in self._connections:
            s += r"\subsection{" + "{0}$\\rightarrow${1}".format(c[0], c[1])
            s += r"}" + "\n"
            for i in range(len(self._diagramnames)):
                s += self._diagramnames[i][2] + "\\\\\n"
                n = self._getDiagramName(i, c)
                s += (r"\includegraphics[width=\linewidth]{" + n + r"}")
                s += (r"\\\\")
                if self._printInterpretations:
                    interpretation =\
                            self._dialogInterpretations.\
                                getDiagramInterpretation(n)
                    if not interpretation is None and len(interpretation) > 0:
                        s += self._texEscape(interpretation)
                s += ("\\\\\\\n")
                s += r"\newpage" + "\n"
        return s

    def _getDiagramName(self, idx, connection):
        """Gets the fully qualififed name for a diagram."""
        return "{0}_{1}-{2}".format(
                self._diagramnames[idx][0], connection[0], connection[1])

    def _texEscape(self, text):
        """Replaces special characters in a text by their tex representation."""
        t = text.replace("\n", "\\\\\n")
        return t

    def _getPreface(self):
        """Gets the text that will be printed at the very top of the report."""
        s = self.tr(
        "This report shows the punctuality of all trains between observed "\
        "travel connections. The observation was accomplished by querying the"\
        " Deutsche Bahn online travel connection service on a regular "\
        "basis\\footnote{\\textit{reiseauskunft.bahn.de/bin/query2.exe} was "\
        "queried every two minutes}.\\\\\\\\"\
        "Describe some more here...")
        return s

    def _getSurroundingConditions(self):
        """Gets the text that describes the surrounding conditions of the
        report."""
        s = self.tr(
        "The this report covers all observed trains of the following travel "\
        "connections between {0} and {1}.\\\\\n")
        s = s.format(
            "01.01.2011", "31.12.2011")

        s += r"\begin{itemize}" + "\n"
        for c in self._connections:
            s += r"  \item {0}$\rightarrow${1}".format(c[0], c[1]) + "\n"
        s += r"\end{itemize}" + "\n"

        s += self.tr(
        'A train counts as observed when it appears in the overview that is '\
        'returned from the query at reiseauskunft.bahn.de with a valid '\
        ' departure time and a valid text phrase in the table cell that tells '\
        'about the train\'s delay\\footnote{Text is valid if it says '\
        '\'p"unktlich\' or \'+$<$amount of minutes$>$\'}\\\\'\
        'Because of the nature of the DB online travel info service only '\
        'showing delays greater than five minutes, not all delayed trains '\
        'will be marked so. Also trains often have a much greater delay than '\
        'the online service will tell. So in reality delays are much worse '\
        'than shown in this report.\\\\'\
        'Due to the inconsistent delay status of cancelled trains in the '\
        'online service cancelled trains are either listed as delayed '
        'or not listed at all.')
        return s

    def _getGlobalOverview(self):
        """Gets the text that describes the global overview about all logged
        connections."""
        s = self.tr(
        "This section shows a rough overview about all logged travel "\
        "connections.\\\\"\
        "A total amount of {0} trains has been logged, {1}\\% of which had "\
        "been in time.\\\\"\
        "Distribution across the logged travel connections following "\
        "yields following result:\\\\")
        t, ld = 0, 0
        for c in self._connections:
            t += len(self._getTotalTrainsLogged(c))
            ld += len(self._getTotalTrainsDelayed(c))
        d = 100.0 - float(ld) / float(t) * 100.0
        s = s.format(t, str("%.02f"%d))
        con = self.tr("Connection")
        total = self.tr("Total Logged")
        inTime = self.tr("\\% In Time")
        s = self._printTableHeader(s, con, total, inTime)
        for c in self._connections:
            t = len(self._getTotalTrainsLogged(c))
            ld = len(self._getTotalTrainsDelayed(c))
            d = 100.0 - float(ld) / float(t) * 100.0
            s += "%s$\\rightarrow$%s & %d & %.02f"%(c[0], c[1], t, d)
            s += "\\%\\\\\n"
        s = self._finishTable(s)

        s += self.tr("Dividing these results into the logged products "\
        "yields following result:\\\\\n")

        s = self._printTableHeader(s, con, self.tr("Product"), total, inTime)
        products = self._getProductsForMultipleConnections(self._connections)
        for c in self._connections:
            for p in products:
                t = len(self._getTotalTrainsLogged(c, p))
                if t > 0:
                    ld = len(self._getTotalTrainsDelayed(c, p))
                    d = 100.0 - float(ld) / float(t) * 100.0
                    s += "%s$\\rightarrow$%s & %s & %d & %.02f"%(
                            c[0], c[1], p, t, d)
                    s += "\\%\\\\\n"
        s = self._finishTable(s)

        return s

    def _printTableHeader(self, s, *columnHeaders):
        """Tex'es the beginning of a table and its headers."""
        s += r"\begin{longtable}{"
        s += "r" * len(columnHeaders)
        s += "}\n"
        for h in columnHeaders:
            s += h + " & "
        s = s[:-3] #strip the last column separator
        s += "\\\\\n"
        s += "\\hline\n\\hline\n"
        return s

    def _finishTable(self, s):
        """Ends a tex table."""
        s += r"\end{longtable}" + "\n"
        return s

    def _getTotalTrainsLogged(self, connection=None, product=None):
        """Gets the amount of total trains logged for the given connection."""
        f = TrainFilter()
        if not product is None:
            f.setProductFilter(product)
        if not connection is None:
            f.setConnectionFilter(connection)
        return self._db.getTrainsFiltered(f)

    def _getTotalTrainsDelayed(self, connection=None, product=None):
        """Gets the total amount of logged delayed trains."""
        f = TrainFilter()
        if not connection is None:
            f.setConnectionFilter(connection)
        if not product is None:
            f.setProductFilter(product)
        f.setDelayFilter(0)
        return self._db.getTrainsFiltered(f)

    def _getProductsForConnection(self, connection):
        """Gets all products that occur in the given connection."""
        l = self._getTotalTrainsLogged(connection)
        p = {}
        for t in l:
            p[t.product()] = 1
        return p.keys()

    def _getProductsForMultipleConnections(self, connections):
        """Gets all unique products that occur in the given connections."""
        products = {}
        for c in connections:
            l = self._getProductsForConnection(c)
            for p in l:
                products[p] = 1
        return products.keys()