def task() : if request.method == 'GET' : try : # get request parameters req = {k:v for k,v in dict(request.args).items()} if all([isinstance(v, list) for k, v in req.items()]) : req = {k:v[0] for k,v in dict(request.args).items()} # validate request parameters(skipped) # authentication/ authorization # mark the client as active(Not doing that here anymore) remove soon #Tracker.add_active_clients([req['id']]) # process request task = TaskQueue.pop(req['id']) if task : msg = { 'tasks' : [task], 'count' : 1, 'status' : 'success' } else : msg = { 'count' : 0, 'status' : 'success' } except Exception as e : msg = { 'status' : 'failure', 'error' : '{}:{}'.format(e.__class__.__name__, str(e)) } return jsonify(msg) if request.method == 'POST' : try : # get json parameters req = request.get_json() if req is None : raise Exception('No json found') # validate request parameters(skilled) # authentication/ authorization # process request task = copy.deepcopy(req) TaskQueue.push(req['target_id'], task) msg = { 'status' : 'success'} except Exception as e : msg = { 'status' : 'failure', 'error' : '{}:{}'.format(e.__class__.__name__, str(e)) } return jsonify(msg)
def test_simple_enqueue_dequeue(self): q = TaskQueue() task = Page('http://www.google.com', 1, 80) q.en_queue(task) self.assertTrue(q.total_task_cnt == 1) self.assertTrue(q.prio_task_cnt[0] == 1) self.assertTrue(q.prio_task_list[0] == [task]) outtask = q.de_queue() self.assertTrue(outtask.depth == 1) self.assertTrue(outtask.score == 80) self.assertTrue(outtask.url == 'http://www.google.com') self.check_empty_queue(q)
def test_bulk_enqueue_dequeue(self): q = TaskQueue() for cnt in range(10000): task = Page('http://www.nyu.edu/engineering', 2, 60) q.en_queue(task) self.assertTrue(q.total_task_cnt == 10000) self.assertTrue(q.prio_task_cnt[0] == 10000) self.assertTrue(len(q.prio_task_list[0]) == 10000) while 1: outtask = q.de_queue() if not outtask: break self.assertTrue(outtask.url == 'http://www.nyu.edu/engineering') self.assertTrue(outtask.depth == 2) self.assertTrue(outtask.score == 60) self.check_empty_queue(q)
def test_page_crawler_init(self): ''' test generic page crawler initialization ''' url = 'http://www.nyu.edu/engineering' page = Page(url, depth=1, score=9) queue = TaskQueue() keywords = ['nyu', 'poly'] cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True) url = u'http://www.nyu.edu/engineering' cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True) url = u'http://www.google.com/search?q=♥' cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True)
def main(): ''' main routine function ''' # argument passing and config file reading st = Settings() # start queue service qs = TaskQueue() # start de-duplicate hash cc = DeDupeCache() # kick off dispatcher dp = Dispatcher(qs, cc, st) dp.run()
def test_normalize_url(self): ''' test normalize url function ''' url = 'http://www.poly.edu/admission/page.html#tuition' page = Page(url, depth=1, score=9) queue = TaskQueue() keywords = ['nyu', 'poly'] self.assertTrue( vc.normalize_link(url) == 'http://www.poly.edu/admission/page.html') url2 = 'http://www.poly.edu/admission/page.html#tuition#abc' self.assertTrue( vc.normalize_link(url2) == 'http://www.poly.edu/admission/page.html')
def test_simplify_url(self): url = "http://www.poly.edu/admission/../page.html" page = Page(url, depth=1, score=9) queue = TaskQueue() keywords = ['nyu', 'poly'] self.assertTrue( vc.simplify_link(url) == 'http://www.poly.edu/page.html') url2 = 'http://www.poly.edu/./page.html' self.assertTrue( vc.simplify_link(url2) == 'http://www.poly.edu/page.html') url3 = 'http://www.poly.edu/../../../../page.html' self.assertTrue( vc.simplify_link(url3) == 'http://www.poly.edu/page.html') url4 = 'http://www.poly.edu/aa/bb/cc/../page.html' self.assertTrue( vc.simplify_link(url4) == 'http://www.poly.edu/aa/bb/page.html') url5 = 'http://www.poly.edu/aa/bb/cc/../../../page.html' self.assertTrue( vc.simplify_link(url5) == 'http://www.poly.edu/page.html') url6 = 'http://www.poly.edu/aa/bb/cc/../../../../page.html' self.assertTrue( vc.simplify_link(url6) == 'http://www.poly.edu/page.html') url7 = 'http://www.poly.edu/./././aa/././././bb/./cc/.././././page.html' self.assertTrue( vc.simplify_link(url7) == 'http://www.poly.edu/aa/bb/page.html') url8 = [ 'http://www.poly.edu/index.html', 'http://www.poly.edu/index.htm', 'http://www.poly.edu/index.jsp', 'http://www.poly.edu/index.asp', 'http://www.poly.edu/index.aspx', 'http://www.poly.edu/index.php', ] for url in url8: self.assertTrue(vc.simplify_link(url) == 'http://www.poly.edu') url9 = 'http://www.poly.edu/a/../../b/index.html' self.assertTrue(vc.simplify_link(url9) == 'http://www.poly.edu/b')
def main(): task_queue = TaskQueue() df = KalmanFilter() dd = GetDistance() dw = DrawSingle(ylim_min=0, ylim_max=10, key='distance') task_queue.append(df) task_queue.append(dd) task_queue.append(dw) socketRun(task_queue.update, port=8070)
def test_init(self): q = TaskQueue() self.check_empty_queue(q)