Exemplo n.º 1
0
def tasks(hour):
    """run tasks, keep it running for crawler"""
    sched = Scheduler(standalone=True)
    # http://pythonhosted.org/APScheduler/modules/scheduler.html
    sched.add_cron_job(douban_spider_task, hour=int(hour))
    sched.start()
    info = 'start tasks at' + datetime.datetime.now().strftime('%y-%m-%d %H:%M:%S')
    spider_task_log.log_info(info)
Exemplo n.º 2
0
def douban_spider_task():
    info = u'tasks.spider_task.douban_spider_task: start %s' % (datetime.datetime.now())
    spider_task_log.log_info(info)

    login()
    info = u'tasks.spider_task.douban_spider_task: login success %s' % (datetime.datetime.now())
    spider_task_log.log_info(info)

    channel_list = update_channel_list()
    for channel_name in channel_list:
        info = u'tasks.spider_task.douban_spider_task: updated_channel %s' % channel_name
        spider_task_log.log_info(info)

    channel_list = get_channel()
    for channel in channel_list:
        music_list = update_music_by_channel(channel, channel.update_num)
        info = u'tasks.spider_task.douban_spider_task: channel %s, updated_num %d' % (channel, len(music_list))
        print info
        spider_task_log.log_info(info)

    info = u'tasks.spider_task.douban_spider_task: end %s' % (datetime.datetime.now())
    spider_task_log.log_info(info)