class ReportGroup(group.Group): label = _(u"Report") fields = field.Fields( field.Field(Field( __name__="table", title=_(u"Problems"), description=_( u"This table lists the top URLs with a " u"bad status. To retry a URL immediately, select " u"\"Enqueue\". Each entry expands to display the " u"pages that the link appeared on and the location in " u"the HTML markup."), required=False), mode="display", ignoreContext=True), ) fields["table"].widgetFactory = ReportWidget.factory
def handleClear(self, action): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return self.tool.clear() logger.info("database cleared.") IStatusMessage(self.request).addStatusMessage( _(u"All data cleared."), "info")
def handleCrawl(self, action): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return self.tool.crawl() logger.info("crawled the site.") IStatusMessage(self.request).addStatusMessage( _(u"All site crawled."), "info")
def handleClear(self, action): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return self.tool.clear() logger.info("database cleared.") IStatusMessage(self.request).addStatusMessage(_(u"All data cleared."), "info")
def handleCrawl(self, action): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return self.tool.crawl() logger.info("crawled the site.") IStatusMessage(self.request).addStatusMessage(_(u"All site crawled."), "info")
def valid_auth(value): for entry in value: if entry.count('|') < 2: raise Invalid(_(u"Each entry must contain at least two '|'")) return True
class ISettings(Interface): report_urls_count = schema.Int( title=_(u'Report Urls count'), description=_(u'The number of Urls to show in the report view.'), required=True, default=20, ) concurrency = schema.Int( title=_(u'Concurrency'), description=_(u'This decides the number of simultaneous downloads.'), required=True, default=5, ) timeout = schema.Int( title=_(u'Timeout'), description=_(u'The timeout in seconds. Increase when using a ' u'slow network/proxy or link to slow sites.'), required=False, default=5, ) interval = schema.Int( title=_(u'Update interval'), description=_(u'The minimum number of hours between checking ' u'the same link to update its link validity status.'), required=True, default=24, ) expiration = schema.Int( title=_(u'Expiration'), description=_(u'This decides the link expiration threshold. Enter ' u'the number of days that a link should be valid ' u'after an appearance in the page output.'), required=True, default=7, ) transaction_size = schema.Int( title=_(u'Transaction size'), description=_(u'The number of items pulled out of the worker queue ' u'for every transaction.'), required=True, default=100, ) use_publisher = schema.Bool( title=_(u'Use publisher'), description=_(u"Select this option to publish internal links " u"that have not been requested, and thus have no " u"recorded response status."), required=False, default=False, ) referers = schema.Int( title=_(u'Referer limit'), description=_(u"The database will store up to this number " u"of referring links for each entry."), required=False, default=5, ) ignore_list = schema.Tuple( title=_(u'Ignore list'), description=_(u'Use regular expressions to prevent links ' u'from appearing in the list. One expression per ' u'line (e.g. "^http://bit.ly").'), required=False, value_type=schema.TextLine(), default=( u"^http://bit.ly", u"^http://t.co", ), ) check_on_request = schema.Bool( title=_(u'Check on every request'), description=_(u'Select this option to check the links on every ' u'request. When disabled checks will be made only on ' u'explicit request.'), required=False, default=True, ) content_types = schema.Tuple( title=_('Content types to check'), description=_('Content types to check on crawling and updating'), required=False, default=(), missing_value=(), value_type=schema.Choice( vocabulary='plone.app.vocabularies.PortalTypes')) workflow_states = schema.Tuple( title=_('Workflow states to check'), description=_('Check items in these states on crawling and updating'), required=False, default=(), missing_value=(), value_type=schema.Choice( source='plone.app.vocabularies.WorkflowStates')) auth_list = schema.Tuple( title=_(u'Authentification'), description= _(u'Links to adresses which use Basic Auth. Format is URL|USERNAME|PASSWORD separated by "|" (the password can contain that caracter).' ), # noqa: E501 value_type=schema.TextLine(), default=(), required=False, constraint=valid_auth, )
class ControlPanelEditForm(controlpanel.RegistryEditForm): schema = ISettings fields = field.Fields() groups = ( ReportGroup, SettingsGroup, ) label = _(u"Link validity") description = _(u"View report and configure operation.") buttons = button.Buttons() buttons += controlpanel.RegistryEditForm.buttons handlers = controlpanel.RegistryEditForm.handlers.copy() rss_template = ViewPageTemplateFile("templates/rss.pt") @property def tool(self): return getToolByName(self.context, 'portal_linkcheck') def update(self): url = self.request.get('enqueue') if url is not None: url = urllib.unquote_plus(url) self.tool.enqueue(url) transaction.commit() location = self.request.getURL() raise Redirect(location) url = self.request.get('remove') if url is not None: url = urllib.unquote_plus(url) self.tool.remove(url) transaction.commit() location = self.request.getURL() raise Redirect(location) super(ControlPanelEditForm, self).update() def get_auth_token(self): manager = getUtility(IKeyManager) secret = manager.secret() sha = hashlib.sha1(self.context.absolute_url()) sha.update(secret) sha.update("RSS") return sha.hexdigest() def get_modified_date(self): return datetime.date.fromtimestamp( min( self.tool.index._p_mtime, self.tool.links._p_mtime, self.tool.checked._p_mtime, )) def list_entries(self, count=100): rows = [] now = datetime.datetime.now() timestamp = int(time.mktime(now.timetuple())) entries = list(self.tool.checked.items()) entries.sort( key=lambda (i, entry): (triage(None if i in self.tool.queue else entry[1]), entry[0]), reverse=True, ) settings = self.getContent() for i, entry in entries: status = entry[1] # Skip entries with unknown status. if not status: continue # Break out of iteration when we reach a good status. if entry[1] == 200: break # Or hit the maximum row count. if len(rows) == count: break url = self.tool.links[i] age = timestamp - (entry[0] or timestamp) referers = filter(None, map(self.tool.links.get, entry[2]))[:settings.referers] # noqa try: quoted_url = urllib.quote_plus(url) except KeyError: quoted_url = None rows.append({ 'url': url, 'quoted_url': quoted_url, 'age': age, 'date': datetime.datetime.fromtimestamp(entry[0] or timestamp), 'status': "%d %s" % (status, status_reasons.get(status, '')), 'referers': referers, 'queued': url in self.tool.queue, }) return rows @button.buttonAndHandler(_(u"Clear and crawl"), name='crawl') def handleCrawl(self, action): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return self.tool.crawl() logger.info("crawled the site.") IStatusMessage(self.request).addStatusMessage(_(u"All site crawled."), "info") @button.buttonAndHandler(_(u"Clear"), name='clear') def handleClear(self, action): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return self.tool.clear() logger.info("database cleared.") IStatusMessage(self.request).addStatusMessage(_(u"All data cleared."), "info") @button.buttonAndHandler(_(u"Export as csv"), name='export_csv') def handleExportCSV(self, action): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return portal = api.portal.get() return self.request.response.redirect( portal.absolute_url() + '/@@linkcheck-export?export_type=csv') def RSS(self): body = self.rss_template() self.request.response.setHeader('Content-Type', 'application/rss+xml') self.request.response.setHeader( 'Content-Disposition', 'attachment; filename="linkcheck.rss"') return body def crawling_data(self): uids = self.tool.crawl_queue._data catalog = api.portal.get_tool('portal_catalog') brains = catalog(UID=uids) return brains
class SettingsGroup(group.Group): label = _(u"Settings") fields = field.Fields(ISettings)