Пример #1
0
def generate(crawler):
    """Output the list of not checked pages."""
    session = Session()
    links = session.query(Link).filter(Link.yanked != None).order_by(Link.url)
    links = links.options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
Пример #2
0
def generate(crawler):
    """Output a sorted list of URLs."""
    session = Session()
    links = session.query(Link).order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
Пример #3
0
def generate(crawler):
    """Present the list of bad links."""
    session = Session()
    links = session.query(Link).filter(Link.linkproblems.any())
    links = links.order_by(Link.url).options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
Пример #4
0
def generate(crawler):
    """Output the list of pages without a title."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter((char_length(Link.title) == 0)
                         | (Link.title == None)).order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
Пример #5
0
def generate(crawler):
    """Generate the list of external links."""
    session = Session()
    links = session.query(Link).filter(Link.is_internal != True).order_by(Link.url)
    links = links.options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
Пример #6
0
def generate(crawler):
    """Present the list of bad links."""
    session = Session()
    links = session.query(Link).filter(Link.linkproblems.any())
    links = links.order_by(Link.url).options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
Пример #7
0
def generate(crawler):
    """Output the list of not checked pages."""
    session = Session()
    links = session.query(Link).filter(Link.yanked != None).order_by(Link.url)
    links = links.options(joinedload(Link.linkproblems))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
Пример #8
0
def generate(crawler):
    """Output the list of outdated pages to the specified file descriptor."""
    session = Session()
    oldtime = datetime.datetime.now() - datetime.timedelta(days=config.REPORT_WHATSOLD_URL_AGE)
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter(Link.mtime < oldtime).order_by(Link.mtime)
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links, now=datetime.datetime.now())
    session.close()
Пример #9
0
def generate(crawler):
    """Output the list of recently modified pages."""
    session = Session()
    newtime = datetime.datetime.now() - datetime.timedelta(days=config.REPORT_WHATSNEW_URL_AGE)
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter(Link.mtime > newtime).order_by(Link.mtime.desc())
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links, now=datetime.datetime.now())
    session.close()
Пример #10
0
def generate(crawler):
    """Output the list of pages without a title."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter((char_length(Link.title) == 0) |
                         (Link.title == None)).order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
Пример #11
0
def generate(crawler):
    """Output the sitemap."""
    session = Session()
    links = [
        session.query(Link).filter_by(url=url).first()
        for url in crawler.base_urls
    ]
    links = explore(links)
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
Пример #12
0
def generate(crawler):
    """Output the list of large pages."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = [x for x in links
             if get_size(x) >= config.REPORT_SLOW_URL_SIZE * 1024]
    links.sort(lambda a, b: cmp(b.total_size, a.total_size))
    render(__outputfile__, crawler=crawler, title=__title__,
           links=links)
    session.close()
Пример #13
0
def generate(crawler):
    """Generate a list of image URLs that were found."""
    session = Session()
    # get non-page links that have an image/* mimetype
    links = session.query(Link)
    links = links.filter((Link.is_page != True) | (Link.is_page == None))
    links = links.filter(Link.mimetype.startswith('image/'))
    links = links.order_by(Link.url)
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
Пример #14
0
def generate(crawler):
    """Output the list of large pages."""
    session = Session()
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = [
        x for x in links if get_size(x) >= config.REPORT_SLOW_URL_SIZE * 1024
    ]
    links.sort(lambda a, b: cmp(b.total_size, a.total_size))
    render(__outputfile__, crawler=crawler, title=__title__, links=links)
    session.close()
Пример #15
0
def generate(crawler):
    """Output the list of outdated pages to the specified file descriptor."""
    session = Session()
    oldtime = datetime.datetime.now() - datetime.timedelta(
        days=config.REPORT_WHATSOLD_URL_AGE)
    links = session.query(Link).filter_by(is_page=True, is_internal=True)
    links = links.filter(Link.mtime < oldtime).order_by(Link.mtime)
    render(__outputfile__,
           crawler=crawler,
           title=__title__,
           links=links,
           now=datetime.datetime.now())
    session.close()
Пример #16
0
def generate(crawler):
    """Output the overview of problems per author."""
    session = Session()
    # make a list of problems per author
    problem_db = collections.defaultdict(list)
    # get internal links with page problems
    links = session.query(Link).filter_by(is_internal=True)
    links = links.filter(Link.pageproblems.any()).order_by(Link.url)
    for link in links:
        author = link.author.strip() if link.author else u'Unknown'
        problem_db[author].append(link)
    # get a sorted list of authors
    authors = problem_db.keys()
    authors.sort()
    authors = [(x, problem_db[x]) for x in authors]
    render(__outputfile__, crawler=crawler, title=__title__,
           authors=authors, mk_id=mk_id)
    session.close()
Пример #17
0
def generate(crawler):
    """Output the overview of problems per author."""
    session = Session()
    # make a list of problems per author
    problem_db = collections.defaultdict(list)
    # get internal links with page problems
    links = session.query(Link).filter_by(is_internal=True)
    links = links.filter(Link.pageproblems.any()).order_by(Link.url)
    for link in links:
        author = link.author.strip() if link.author else u'Unknown'
        problem_db[author].append(link)
    # get a sorted list of authors
    authors = problem_db.keys()
    authors.sort()
    authors = [(x, problem_db[x]) for x in authors]
    render(__outputfile__,
           crawler=crawler,
           title=__title__,
           authors=authors,
           mk_id=mk_id)
    session.close()
Пример #18
0
def generate(crawler):
    """Output a list of modules, it's authors and the webcheck version."""
    session = Session()
    render(__outputfile__, crawler=crawler, title=__title__,
           numlinks=session.query(Link).count())
    session.close()