Пример #1
0
class ReportGroup(group.Group):
    label = _(u"Report")
    fields = field.Fields(
        field.Field(Field(
            __name__="table",
            title=_(u"Problems"),
            description=_(
                u"This table lists the top URLs with a "
                u"bad status. To retry a URL immediately, select "
                u"\"Enqueue\". Each entry expands to display the "
                u"pages that the link appeared on and the location in "
                u"the HTML markup."),
            required=False),
                    mode="display",
                    ignoreContext=True), )

    fields["table"].widgetFactory = ReportWidget.factory
Пример #2
0
    def handleClear(self, action):
        data, errors = self.extractData()
        if errors:
            self.status = self.formErrorsMessage
            return

        self.tool.clear()

        logger.info("database cleared.")

        IStatusMessage(self.request).addStatusMessage(
            _(u"All data cleared."), "info")
    def handleCrawl(self, action):
        data, errors = self.extractData()
        if errors:
            self.status = self.formErrorsMessage
            return

        self.tool.crawl()

        logger.info("crawled the site.")

        IStatusMessage(self.request).addStatusMessage(
            _(u"All site crawled."), "info")
Пример #4
0
    def handleClear(self, action):
        data, errors = self.extractData()
        if errors:
            self.status = self.formErrorsMessage
            return

        self.tool.clear()

        logger.info("database cleared.")

        IStatusMessage(self.request).addStatusMessage(_(u"All data cleared."),
                                                      "info")
Пример #5
0
    def handleCrawl(self, action):
        data, errors = self.extractData()
        if errors:
            self.status = self.formErrorsMessage
            return

        self.tool.crawl()

        logger.info("crawled the site.")

        IStatusMessage(self.request).addStatusMessage(_(u"All site crawled."),
                                                      "info")
Пример #6
0
def valid_auth(value):
    for entry in value:
        if entry.count('|') < 2:
            raise Invalid(_(u"Each entry must contain at least two '|'"))
    return True
Пример #7
0
class ISettings(Interface):

    report_urls_count = schema.Int(
        title=_(u'Report Urls count'),
        description=_(u'The number of Urls to show in the report view.'),
        required=True,
        default=20,
    )

    concurrency = schema.Int(
        title=_(u'Concurrency'),
        description=_(u'This decides the number of simultaneous downloads.'),
        required=True,
        default=5,
    )

    timeout = schema.Int(
        title=_(u'Timeout'),
        description=_(u'The timeout in seconds. Increase when using a '
                      u'slow network/proxy or link to slow sites.'),
        required=False,
        default=5,
    )

    interval = schema.Int(
        title=_(u'Update interval'),
        description=_(u'The minimum number of hours between checking '
                      u'the same link to update its link validity status.'),
        required=True,
        default=24,
    )

    expiration = schema.Int(
        title=_(u'Expiration'),
        description=_(u'This decides the link expiration threshold. Enter '
                      u'the number of days that a link should be valid '
                      u'after an appearance in the page output.'),
        required=True,
        default=7,
    )

    transaction_size = schema.Int(
        title=_(u'Transaction size'),
        description=_(u'The number of items pulled out of the worker queue '
                      u'for every transaction.'),
        required=True,
        default=100,
    )

    use_publisher = schema.Bool(
        title=_(u'Use publisher'),
        description=_(u"Select this option to publish internal links "
                      u"that have not been requested, and thus have no "
                      u"recorded response status."),
        required=False,
        default=False,
    )

    referers = schema.Int(
        title=_(u'Referer limit'),
        description=_(u"The database will store up to this number "
                      u"of referring links for each entry."),
        required=False,
        default=5,
    )

    ignore_list = schema.Tuple(
        title=_(u'Ignore list'),
        description=_(u'Use regular expressions to prevent links '
                      u'from appearing in the list. One expression per '
                      u'line (e.g. "^http://bit.ly").'),
        required=False,
        value_type=schema.TextLine(),
        default=(
            u"^http://bit.ly",
            u"^http://t.co",
        ),
    )

    check_on_request = schema.Bool(
        title=_(u'Check on every request'),
        description=_(u'Select this option to check the links on every '
                      u'request. When disabled checks will be made only on '
                      u'explicit request.'),
        required=False,
        default=True,
    )

    content_types = schema.Tuple(
        title=_('Content types to check'),
        description=_('Content types to check on crawling and updating'),
        required=False,
        default=(),
        missing_value=(),
        value_type=schema.Choice(
            vocabulary='plone.app.vocabularies.PortalTypes'))

    workflow_states = schema.Tuple(
        title=_('Workflow states to check'),
        description=_('Check items in these states on crawling and updating'),
        required=False,
        default=(),
        missing_value=(),
        value_type=schema.Choice(
            source='plone.app.vocabularies.WorkflowStates'))

    auth_list = schema.Tuple(
        title=_(u'Authentification'),
        description=
        _(u'Links to adresses which use Basic Auth. Format is URL|USERNAME|PASSWORD separated by "|" (the password can contain that caracter).'
          ),  # noqa: E501
        value_type=schema.TextLine(),
        default=(),
        required=False,
        constraint=valid_auth,
    )
Пример #8
0
def valid_auth(value):
    for entry in value:
        if entry.count('|') < 2:
            raise Invalid(_(u"Each entry must contain at least two '|'"))
    return True
Пример #9
0
class ControlPanelEditForm(controlpanel.RegistryEditForm):
    schema = ISettings
    fields = field.Fields()
    groups = (
        ReportGroup,
        SettingsGroup,
    )

    label = _(u"Link validity")
    description = _(u"View report and configure operation.")

    buttons = button.Buttons()
    buttons += controlpanel.RegistryEditForm.buttons
    handlers = controlpanel.RegistryEditForm.handlers.copy()

    rss_template = ViewPageTemplateFile("templates/rss.pt")

    @property
    def tool(self):
        return getToolByName(self.context, 'portal_linkcheck')

    def update(self):
        url = self.request.get('enqueue')
        if url is not None:
            url = urllib.unquote_plus(url)
            self.tool.enqueue(url)
            transaction.commit()
            location = self.request.getURL()
            raise Redirect(location)

        url = self.request.get('remove')
        if url is not None:
            url = urllib.unquote_plus(url)
            self.tool.remove(url)
            transaction.commit()
            location = self.request.getURL()
            raise Redirect(location)

        super(ControlPanelEditForm, self).update()

    def get_auth_token(self):
        manager = getUtility(IKeyManager)
        secret = manager.secret()
        sha = hashlib.sha1(self.context.absolute_url())
        sha.update(secret)
        sha.update("RSS")
        return sha.hexdigest()

    def get_modified_date(self):
        return datetime.date.fromtimestamp(
            min(
                self.tool.index._p_mtime,
                self.tool.links._p_mtime,
                self.tool.checked._p_mtime,
            ))

    def list_entries(self, count=100):
        rows = []

        now = datetime.datetime.now()
        timestamp = int(time.mktime(now.timetuple()))

        entries = list(self.tool.checked.items())
        entries.sort(
            key=lambda (i, entry):
            (triage(None if i in self.tool.queue else entry[1]), entry[0]),
            reverse=True,
        )

        settings = self.getContent()

        for i, entry in entries:
            status = entry[1]

            # Skip entries with unknown status.
            if not status:
                continue

            # Break out of iteration when we reach a good status.
            if entry[1] == 200:
                break

            # Or hit the maximum row count.
            if len(rows) == count:
                break

            url = self.tool.links[i]
            age = timestamp - (entry[0] or timestamp)

            referers = filter(None, map(self.tool.links.get,
                                        entry[2]))[:settings.referers]  # noqa

            try:
                quoted_url = urllib.quote_plus(url)
            except KeyError:
                quoted_url = None

            rows.append({
                'url':
                url,
                'quoted_url':
                quoted_url,
                'age':
                age,
                'date':
                datetime.datetime.fromtimestamp(entry[0] or timestamp),
                'status':
                "%d %s" % (status, status_reasons.get(status, '')),
                'referers':
                referers,
                'queued':
                url in self.tool.queue,
            })

        return rows

    @button.buttonAndHandler(_(u"Clear and crawl"), name='crawl')
    def handleCrawl(self, action):
        data, errors = self.extractData()
        if errors:
            self.status = self.formErrorsMessage
            return

        self.tool.crawl()

        logger.info("crawled the site.")

        IStatusMessage(self.request).addStatusMessage(_(u"All site crawled."),
                                                      "info")

    @button.buttonAndHandler(_(u"Clear"), name='clear')
    def handleClear(self, action):
        data, errors = self.extractData()
        if errors:
            self.status = self.formErrorsMessage
            return

        self.tool.clear()

        logger.info("database cleared.")

        IStatusMessage(self.request).addStatusMessage(_(u"All data cleared."),
                                                      "info")

    @button.buttonAndHandler(_(u"Export as csv"), name='export_csv')
    def handleExportCSV(self, action):
        data, errors = self.extractData()
        if errors:
            self.status = self.formErrorsMessage
            return
        portal = api.portal.get()
        return self.request.response.redirect(
            portal.absolute_url() + '/@@linkcheck-export?export_type=csv')

    def RSS(self):
        body = self.rss_template()

        self.request.response.setHeader('Content-Type', 'application/rss+xml')
        self.request.response.setHeader(
            'Content-Disposition', 'attachment; filename="linkcheck.rss"')

        return body

    def crawling_data(self):
        uids = self.tool.crawl_queue._data
        catalog = api.portal.get_tool('portal_catalog')
        brains = catalog(UID=uids)
        return brains
Пример #10
0
class SettingsGroup(group.Group):
    label = _(u"Settings")
    fields = field.Fields(ISettings)