def init_user_prof(host, job_name='user_prof'): s = Scanner(join(cache_root, 'uid.json'), uid_ptn) s.scan(page_path['reviews']) total = {v for vs in s.data.values() for v in vs} done = {fn[:fn.find('_')] for fn in os.listdir(page_path[job_name])} job = JobPool(job_name, host, db=db) job.init(total, done)
return request_pages(key, range(1, max_page), url, find_rev, filename_ptn=filename_ptn) def init_user_prof(host, job_name='user_prof'): s = Scanner(join(cache_root, 'uid.json'), uid_ptn) s.scan(page_path['reviews']) total = {v for vs in s.data.values() for v in vs} done = {fn[:fn.find('_')] for fn in os.listdir(page_path[job_name])} job = JobPool(job_name, host, db=db) job.init(total, done) if __name__ == '__main__': """ from test.test_tools import request, request_pages uid = '3601131' print grab_user_prof(uid) sid = '5195730' # 45 reviews on 2015.8.3 print grab_shop_prof(sid) print grab_reviews(sid) """ job = JobPool('user_prof', 'jackon.me', db=db) job.run(grab_user_prof)
filename_ptn = join(page_path['reviews'], '{}_{}.html') # key, page return request_pages(key, range(1, max_page), url, find_rev, filename_ptn=filename_ptn) def init_user_prof(host, job_name='user_prof'): s = Scanner(join(cache_root, 'uid.json'), uid_ptn) s.scan(page_path['reviews']) total = {v for vs in s.data.values() for v in vs} done = {fn[:fn.find('_')] for fn in os.listdir(page_path[job_name])} job = JobPool(job_name, host, db=db) job.init(total, done) if __name__ == '__main__': """ from test.test_tools import request, request_pages uid = '3601131' print grab_user_prof(uid) sid = '5195730' # 45 reviews on 2015.8.3 print grab_shop_prof(sid) print grab_reviews(sid) """ job = JobPool('user_prof', 'jackon.me', db=db) job.run(grab_user_prof)