logger = get_logger() url = BASE_URL + 'vol.' + str(num) logger.info('url:%s', url) request = urllib.request.Request(url) request = fill_request(request) html = get_html(request) return html def fill_request(request): for k in HEADERS: request.add_header(k, HEADERS[k]) return request if __name__ == '__main__': logger = get_logger() start_day = datetime.date(2012, 10, 8) today = datetime.date.today() vol = today - start_day + datetime.timedelta(days=2) vol = vol.days logger.info('range from %d to %d', 1, vol) for i in range(1, vol): logger.info('begin get vol %d', i) html = get_one_journal(i) if html is None: logger.error('get html error') else: insert_journal_src(i, html) time.sleep(5)
#! /usr/bin/env python3 # -*- coding: utf-8 -*- import datetime from loggingutil import get_logger from gethtml import get_one_journal from dbutil import insert_journal_src from extractdata import get_one_journal_data if __name__ == '__main__': start_day = datetime.date(2012, 10, 8) today = datetime.date.today() vol = today - start_day + datetime.timedelta(days=1) vol = vol.days html = get_one_journal(vol) res = insert_journal_src(vol, html) if res == False: logger = get_logger() logger.error('persist data error: vol:%d', vol) else: get_one_journal_data(vol)