Пример #1
0
def download_articles(parameters):
    session = models.Session()
    urls = url.prepare_urls(parameters)
    i = 1
    if 'start-page' in parameters:
        page = parameters['start-page']
    else:
        page = 1

    for u in urls:
        print "Getting page %d: %s ..." % (page, u)
        search_page = pages.SearchPage(u)

        for l in search_page.links:
            print "%d: Getting %s ..." % (i, l)
            article_page = pages.ArticlePage(l)
            db_article = convert.convert_article(article_page)
            session.add(db_article)
            i = i + 1

        session.commit()
        print "Saved %d records; page %d finished." % (i, page)
        page = page + 1

    session.commit()
    print "Saved."
Пример #2
0
def download_articles(parameters):
    session = models.Session()
    u = url.prepare_url(parameters)

    print "Getting %s ..." % u
    search_page = pages.SearchPage(u)

    i = 1
    for l in search_page.links:
        print "%d: Getting %s ..." % (i, l)
        article_page = pages.BibliographicPage(l)
        db_article = convert.convert_article(article_page)
        session.add(db_article)

        if i % 100 == 0:
            session.commit()
            print "Saved %d records." % i
        i = i + 1

    session.commit()
    print "Saved."