示例#1
0
文件: online.py 项目: JH27/crawlers
def update(a):
    print '## Get specific data'
    specific.get_html(a, range=(bill_s, bill_e))
    specific.html2json(a, range=(bill_s, bill_e))

    print '## Get pdfs'
    pdf.get_pdf(a, range=(bill_s, bill_e))
示例#2
0
def update(a):
    print '## Get specific data'
    specific.get_html(a, range=(bill_s, bill_e))
    specific.html2json(a, range=(bill_s, bill_e))

    print '## Get pdfs'
    pdf.get_pdf(a, range=(bill_s, bill_e))
示例#3
0
文件: online.py 项目: JH27/crawlers
def get_new(a):
    print '## Get meta data'
    new_bill_ids = fetch_new_bill_ids(a)

    push_to_queue('insert_bills_db', new_bill_ids)

    print '## Get specific data'
    specific.get_html(a, bill_ids=new_bill_ids)
    specific.html2json(a, bill_ids=new_bill_ids)

    print '## Get pdfs'
    pdf.get_pdf(a, bill_ids=new_bill_ids)
示例#4
0
def get_new(a):
    print '## Get meta data'
    new_bill_ids = fetch_new_bill_ids(a)

    push_to_queue('insert_bills_db', new_bill_ids)

    print '## Get specific data'
    specific.get_html(a, bill_ids=new_bill_ids)
    specific.html2json(a, bill_ids=new_bill_ids)

    print '## Get pdfs'
    pdf.get_pdf(a, bill_ids=new_bill_ids)
示例#5
0
def get_new(a):
    print '## Get meta data'
    new_bill_ids = fetch_new_bill_ids(a)

    for queue_name in QUEUE_NAMES.itervalues():
        push_to_queue(queue_name, new_bill_ids)

    print '## Get specific data'
    specific.get_html(a, bill_ids=new_bill_ids)
    specific.html2json(a, bill_ids=new_bill_ids)

    print '## Get pdfs'
    pdf.get_pdf(a, bill_ids=new_bill_ids)
示例#6
0
文件: main.py 项目: dongx3/crawlers
#! /usr/bin/python2.7
# -*- coding: utf-8 -*-

import meta
import specific
import pdf

assembly_s, assembly_e = 17, 19
bill_s, bill_e = None, None

for a in range(assembly_s, assembly_e+1):
    print '\n# Assembly %d' % a

    print '## Get meta data'
    npages = meta.get_npages(a)
    meta.get_html(a, npages)
    meta.html2csv(a, npages)

    print '## Get specific data'
    specific.get_html(a, range=(bill_s, bill_e))
    specific.html2json(a, range=(bill_s, bill_e))

    print '## Get pdfs'
    pdf.get_pdf(a, range=(bill_s, bill_e))
示例#7
0
#! /usr/bin/python2.7
# -*- coding: utf-8 -*-

import meta
import specific
import pdf

assembly_s, assembly_e = 17, 19 # start, end id of assembly
bill_s, bill_e = None, None     # start, end number of bill

for a in range(assembly_s, assembly_e+1):
    print '\n# Assembly %d' % a

    print '## Get meta data'
    npages = meta.get_npages(a)
    meta.get_html(a, npages)
    meta.html2csv(a, npages)

    print '## Get specific data'
    specific.get_html(a, range=(bill_s, bill_e))
    specific.html2json(a, range=(bill_s, bill_e))

    print '## Get pdfs'
    pdf.get_pdf(a, range=(bill_s, bill_e))