Пример #1
0
    def webconsole_render(self, wc_request):
        sch = scrapyengine.scheduler
        dwl = scrapyengine.downloader

        totdomains = totscraped = totcrawled = totscheduled = totactive = totdqueued = tottransf = 0
        s = banner(self)
        s += "<table border='1'>\n"
        s += "<tr><th>Spider</th><th>Items<br>Scraped</th><th>Pages<br>Crawled</th><th>Scheduler<br>Pending</th><th>Downloader<br/>Queued</th><th>Downloader<br/>Active</th><th>Downloader<br/>Transferring</th><th>Start time</th><th>Run time</th></tr>\n"
        for spider in sorted(self.domains.keys()):
            scheduled = len(sch.pending_requests[spider]) if spider in sch.pending_requests else 0
            active = len(dwl.sites[spider].active) if spider in dwl.sites else 0
            dqueued = len(dwl.sites[spider].queue) if spider in dwl.sites else 0
            transf = len(dwl.sites[spider].transferring) if spider in dwl.sites else 0
            stats = self.domains[spider]
            runtime = datetime.now() - stats.started

            s += '<tr><td>%s</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td>%s</td><td>%s</td></tr>\n' % \
                 (spider.domain_name, stats.scraped, stats.crawled, scheduled, dqueued, active, transf, str(stats.started), str(runtime))

            totdomains += 1
            totscraped += stats.scraped
            totcrawled += stats.crawled
            totscheduled += scheduled
            totactive += active
            totdqueued += dqueued
            tottransf += transf
        s += '<tr><td><b>%d domains</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td/><td/></tr>\n' % \
             (totdomains, totscraped, totcrawled, totscheduled, totdqueued, totactive, tottransf)
        s += "</table>\n"

        s += "</body>\n"
        s += "</html>\n"

        return s
Пример #2
0
    def webconsole_render(self, wc_request):
        s = banner(self)
        s += "<pre><code>\n"
        s += get_engine_status()
        s += "</pre></code>\n"
        s += "</body>\n"
        s += "</html>\n"

        return s
Пример #3
0
    def webconsole_render(self, wc_request):
        s = banner(self)
        s += "<h3>Global stats</h3>\n"
        s += stats_html_table(stats.get_stats())
        for spider, spider_stats in stats.iter_spider_stats():
            s += "<h3>%s</h3>\n" % spider.domain_name
            s += stats_html_table(spider_stats)
        s += "</body>\n"
        s += "</html>\n"

        return str(s)
Пример #4
0
    def webconsole_render(self, wc_request):
        s = banner(self)
        s += "<ul>\n"
        for domain, request_queue in scrapymanager.engine.scheduler.pending_requests.iteritems():
            s += "<li>\n"
            s += "%s (<b>%s</b> requests)\n" % (domain, len(request_queue))
            s += "<ul>\n"
            for ((req, _), prio) in request_queue:
                s += "<li><a href='%s'>%s</a> (priority: %d)</li>\n" % (req.url, req.url, prio)
            s += "</ul>\n"
            s += "</li>\n" 
        s += "</ul>\n"

        s += "</body>\n"
        s += "</html>\n"

        return s
Пример #5
0
    def webconsole_render(self, wc_request):
        if wc_request.args:
            changes = self.webconsole_control(wc_request)

        self.scheduled = [s[0].name for s in scrapymanager.queue.spider_requests]
        self.idle = [d for d in self.enabled_spiders if d not in self.scheduled
                                                        and d not in self.running
                                                        and d not in self.finished]

        s = banner(self)
        s += '<table border=1">\n'
        s += "<tr><th>Idle (%d)</th><th>Scheduled (%d)</th><th>Running (%d/%d)</th><th>Finished (%d)</th></tr>\n" % \
                (len(self.idle),
                 len(self.scheduled),
                 len(self.running),
                 settings['CONCURRENT_SPIDERS'],
                 len(self.finished))
        s += "<tr>\n"

        # idle
        s += "<td valign='top'>\n"
        s += '<form method="post" action=".">\n'
        s += '<select name="add_pending_spiders" multiple="multiple">\n'
        for name in sorted(self.idle):
            s += "<option>%s</option>\n" % name
        s += '</select><br>\n'
        s += '<br />'
        s += '<input type="submit" value="Schedule selected">\n'
        s += '</form>\n'
        s += "</td>\n"

        # scheduled
        s += "<td valign='top'>\n"
        s += '<form method="post" action=".">\n'
        s += '<select name="remove_pending_spiders" multiple="multiple">\n'
        for name in self.scheduled:
            s += "<option>%s</option>\n" % name
        s += '</select><br>\n'
        s += '<br />'
        s += '<input type="submit" value="Remove selected">\n'
        s += '</form>\n'

        s += "</td>\n"

        # running
        s += "<td valign='top'>\n"
        s += '<form method="post" action=".">\n'
        s += '<select name="stop_running_spiders" multiple="multiple">\n'
        for name in sorted(self.running):
            s += "<option>%s</option>\n" % name 
        s += '</select><br>\n'
        s += '<br />'
        s += '<input type="submit" value="Stop selected">\n'
        s += '</form>\n'
        s += "</td>\n"

        # finished
        s += "<td valign='top'>\n"
        s += '<form method="post" action=".">\n'
        s += '<select name="rerun_finished_spiders" multiple="multiple">\n'
        for name in sorted(self.finished):
            s += "<option>%s</option>\n" % name
        s += '</select><br>\n'
        s += '<br />'
        s += '<input type="submit" value="Re-schedule selected">\n'
        s += '</form>\n'
        s += "</td>\n"

        s += "</tr>\n"
        s += "</table>\n"

        if wc_request.args:
            s += changes

        s += "</body>\n"
        s += "</html>\n"

        return s