def crawls(qry, to_grab, to_handle): '''does crawling based on a given rule until either the rule does not hold or maxiter has been reached''' to_crawl = copy(qry['crawl']) iters = 1 response = [] try: nextlink = to_crawl.pop('next') except: raise Exception('Crawler needs a \'next\' parameter', []) rule = to_crawl.pop('rule','`true`') #if no rule and no max, errrorrrrrr try: maxiter = int(to_crawl.pop('max',10)) except: raise Exception('max crawl val must be an integer!') crawl_kwargs = to_crawl raw_response = request(to_grab) res = handles(raw_response, to_handle) if type(res) == list: response.extend(res) else: response.append(res) link = handles(raw_response, nextlink) valid = search(rule, link) while valid and iters < maxiter: link.update(crawl_kwargs) pprint(link) raw_response = request(link) res = handles(raw_response, to_handle) link = handles(raw_response, nextlink) valid = search(rule, link) iters += 1 if type(res) == list: response.extend(res) else: response.append(res) return response
def query(qry): '''takes a query object, which contains: -how to grab, -(how to handle) -(crawl instructions) most simple flow just has how to grab, then returns the result''' qry = copy(qry) to_handle = qry.get('handle',None) try: to_grab = qry['request'] except: raise Exception('Query has no request parameters!') if 'crawl' not in qry.keys(): response = request(to_grab) return handles(response, to_handle) else: return crawls(qry, to_grab, to_handle)