Exemplo n.º 1
0
def download_articles(parameters):
    session = models.Session()
    urls = url.prepare_urls(parameters)
    i = 1
    if 'start-page' in parameters:
        page = parameters['start-page']
    else:
        page = 1

    for u in urls:
        print "Getting page %d: %s ..." % (page, u)
        search_page = pages.SearchPage(u)

        for l in search_page.links:
            print "%d: Getting %s ..." % (i, l)
            article_page = pages.ArticlePage(l)
            db_article = convert.convert_article(article_page)
            session.add(db_article)
            i = i + 1

        session.commit()
        print "Saved %d records; page %d finished." % (i, page)
        page = page + 1

    session.commit()
    print "Saved."
Exemplo n.º 2
0
def download_patents(parameters):
    session = models.Session()
    urls = url.prepare_urls(parameters)
    i = 1

    for u in urls:
        print "Getting %s ..." % u
        search_page = pages.SearchPage(u)

        for l in search_page.links:
            print "%d: Getting %s ..." % (i, l)
            patent_page = pages.PatentPage(l)
            db_patent = convert.convert_patent(patent_page)
            session.add(db_patent)

            if i % 100 == 0:
                session.commit()
                print "Saved %d records." % i
            i = i + 1

    session.commit()
    print "Saved."