def webconsole_render(self, wc_request): sch = scrapyengine.scheduler dwl = scrapyengine.downloader totdomains = totscraped = totcrawled = totscheduled = totactive = totdqueued = tottransf = 0 s = banner(self) s += "<table border='1'>\n" s += "<tr><th>Spider</th><th>Items<br>Scraped</th><th>Pages<br>Crawled</th><th>Scheduler<br>Pending</th><th>Downloader<br/>Queued</th><th>Downloader<br/>Active</th><th>Downloader<br/>Transferring</th><th>Start time</th><th>Run time</th></tr>\n" for spider in sorted(self.domains.keys()): scheduled = len(sch.pending_requests[spider]) if spider in sch.pending_requests else 0 active = len(dwl.sites[spider].active) if spider in dwl.sites else 0 dqueued = len(dwl.sites[spider].queue) if spider in dwl.sites else 0 transf = len(dwl.sites[spider].transferring) if spider in dwl.sites else 0 stats = self.domains[spider] runtime = datetime.now() - stats.started s += '<tr><td>%s</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td align="right">%d</td><td>%s</td><td>%s</td></tr>\n' % \ (spider.domain_name, stats.scraped, stats.crawled, scheduled, dqueued, active, transf, str(stats.started), str(runtime)) totdomains += 1 totscraped += stats.scraped totcrawled += stats.crawled totscheduled += scheduled totactive += active totdqueued += dqueued tottransf += transf s += '<tr><td><b>%d domains</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td align="right"><b>%d</b></td><td/><td/></tr>\n' % \ (totdomains, totscraped, totcrawled, totscheduled, totdqueued, totactive, tottransf) s += "</table>\n" s += "</body>\n" s += "</html>\n" return s
def webconsole_render(self, wc_request): s = banner(self) s += "<pre><code>\n" s += get_engine_status() s += "</pre></code>\n" s += "</body>\n" s += "</html>\n" return s
def webconsole_render(self, wc_request): s = banner(self) s += "<h3>Global stats</h3>\n" s += stats_html_table(stats.get_stats()) for spider, spider_stats in stats.iter_spider_stats(): s += "<h3>%s</h3>\n" % spider.domain_name s += stats_html_table(spider_stats) s += "</body>\n" s += "</html>\n" return str(s)
def webconsole_render(self, wc_request): s = banner(self) s += "<ul>\n" for domain, request_queue in scrapymanager.engine.scheduler.pending_requests.iteritems(): s += "<li>\n" s += "%s (<b>%s</b> requests)\n" % (domain, len(request_queue)) s += "<ul>\n" for ((req, _), prio) in request_queue: s += "<li><a href='%s'>%s</a> (priority: %d)</li>\n" % (req.url, req.url, prio) s += "</ul>\n" s += "</li>\n" s += "</ul>\n" s += "</body>\n" s += "</html>\n" return s
def webconsole_render(self, wc_request): if wc_request.args: changes = self.webconsole_control(wc_request) self.scheduled = [s[0].name for s in scrapymanager.queue.spider_requests] self.idle = [d for d in self.enabled_spiders if d not in self.scheduled and d not in self.running and d not in self.finished] s = banner(self) s += '<table border=1">\n' s += "<tr><th>Idle (%d)</th><th>Scheduled (%d)</th><th>Running (%d/%d)</th><th>Finished (%d)</th></tr>\n" % \ (len(self.idle), len(self.scheduled), len(self.running), settings['CONCURRENT_SPIDERS'], len(self.finished)) s += "<tr>\n" # idle s += "<td valign='top'>\n" s += '<form method="post" action=".">\n' s += '<select name="add_pending_spiders" multiple="multiple">\n' for name in sorted(self.idle): s += "<option>%s</option>\n" % name s += '</select><br>\n' s += '<br />' s += '<input type="submit" value="Schedule selected">\n' s += '</form>\n' s += "</td>\n" # scheduled s += "<td valign='top'>\n" s += '<form method="post" action=".">\n' s += '<select name="remove_pending_spiders" multiple="multiple">\n' for name in self.scheduled: s += "<option>%s</option>\n" % name s += '</select><br>\n' s += '<br />' s += '<input type="submit" value="Remove selected">\n' s += '</form>\n' s += "</td>\n" # running s += "<td valign='top'>\n" s += '<form method="post" action=".">\n' s += '<select name="stop_running_spiders" multiple="multiple">\n' for name in sorted(self.running): s += "<option>%s</option>\n" % name s += '</select><br>\n' s += '<br />' s += '<input type="submit" value="Stop selected">\n' s += '</form>\n' s += "</td>\n" # finished s += "<td valign='top'>\n" s += '<form method="post" action=".">\n' s += '<select name="rerun_finished_spiders" multiple="multiple">\n' for name in sorted(self.finished): s += "<option>%s</option>\n" % name s += '</select><br>\n' s += '<br />' s += '<input type="submit" value="Re-schedule selected">\n' s += '</form>\n' s += "</td>\n" s += "</tr>\n" s += "</table>\n" if wc_request.args: s += changes s += "</body>\n" s += "</html>\n" return s