示例#1
0
def task() : 
    if request.method == 'GET' : 
        try : 
            # get request parameters
            req = {k:v for k,v in dict(request.args).items()}
            if all([isinstance(v, list) for k, v in req.items()]) : 
                req = {k:v[0] for k,v in dict(request.args).items()}  

            # validate request parameters(skipped)
            # authentication/ authorization

            # mark the client as active(Not doing that here anymore) remove soon
            #Tracker.add_active_clients([req['id']])

            # process request
            task = TaskQueue.pop(req['id'])
            if task : 
                msg = {
                    'tasks' : [task],
                    'count' : 1,
                    'status' : 'success'
                    }
            else : 
                msg = {
                    'count' : 0,
                    'status' : 'success'
                    }
        except Exception as e : 
            msg = {
                    'status' : 'failure',
                    'error' : '{}:{}'.format(e.__class__.__name__, str(e))
                }
        return jsonify(msg)


    if request.method == 'POST' : 
        try : 
            # get json parameters
            req = request.get_json()
            if req is None : 
                raise Exception('No json found')

            # validate request parameters(skilled) 
            # authentication/ authorization

            # process request
            task = copy.deepcopy(req)
            TaskQueue.push(req['target_id'], task)
            msg = { 'status' : 'success'}
        except Exception as e : 
            msg = {
                    'status' : 'failure',
                    'error' : '{}:{}'.format(e.__class__.__name__, str(e))
                    }
        return jsonify(msg)
    def test_simple_enqueue_dequeue(self):
        q = TaskQueue()
        task = Page('http://www.google.com', 1, 80)
        q.en_queue(task)

        self.assertTrue(q.total_task_cnt == 1)
        self.assertTrue(q.prio_task_cnt[0] == 1)
        self.assertTrue(q.prio_task_list[0] == [task])

        outtask = q.de_queue()
        self.assertTrue(outtask.depth == 1)
        self.assertTrue(outtask.score == 80)
        self.assertTrue(outtask.url == 'http://www.google.com')

        self.check_empty_queue(q)
  def test_simple_enqueue_dequeue(self):
    q = TaskQueue()
    task = Page('http://www.google.com', 1, 80)
    q.en_queue(task)

    self.assertTrue(q.total_task_cnt == 1)
    self.assertTrue(q.prio_task_cnt[0] == 1)
    self.assertTrue(q.prio_task_list[0] == [task])

    outtask = q.de_queue()
    self.assertTrue(outtask.depth == 1)
    self.assertTrue(outtask.score == 80)
    self.assertTrue(outtask.url == 'http://www.google.com')

    self.check_empty_queue(q)
    def test_bulk_enqueue_dequeue(self):
        q = TaskQueue()

        for cnt in range(10000):
            task = Page('http://www.nyu.edu/engineering', 2, 60)
            q.en_queue(task)

        self.assertTrue(q.total_task_cnt == 10000)
        self.assertTrue(q.prio_task_cnt[0] == 10000)
        self.assertTrue(len(q.prio_task_list[0]) == 10000)

        while 1:
            outtask = q.de_queue()
            if not outtask:
                break

            self.assertTrue(outtask.url == 'http://www.nyu.edu/engineering')
            self.assertTrue(outtask.depth == 2)
            self.assertTrue(outtask.score == 60)

        self.check_empty_queue(q)
  def test_bulk_enqueue_dequeue(self):
    q = TaskQueue()

    for cnt in range(10000):
      task = Page('http://www.nyu.edu/engineering', 2, 60)
      q.en_queue(task)

    self.assertTrue(q.total_task_cnt == 10000)
    self.assertTrue(q.prio_task_cnt[0] == 10000)
    self.assertTrue(len(q.prio_task_list[0]) == 10000)

    while 1:
      outtask = q.de_queue()
      if not outtask:
        break

      self.assertTrue(outtask.url == 'http://www.nyu.edu/engineering')
      self.assertTrue(outtask.depth == 2)
      self.assertTrue(outtask.score == 60)

    self.check_empty_queue(q)
    def test_page_crawler_init(self):
        ''' test generic page crawler initialization '''

        url = 'http://www.nyu.edu/engineering'
        page = Page(url, depth=1, score=9)
        queue = TaskQueue()
        keywords = ['nyu', 'poly']
        cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True)

        url = u'http://www.nyu.edu/engineering'
        cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True)

        url = u'http://www.google.com/search?q=♥'
        cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True)
def main():
    ''' main routine function '''

    # argument passing and config file reading
    st = Settings()

    # start queue service
    qs = TaskQueue()

    # start de-duplicate hash
    cc = DeDupeCache()

    # kick off dispatcher
    dp = Dispatcher(qs, cc, st)
    dp.run()
    def test_normalize_url(self):
        ''' test normalize url function '''

        url = 'http://www.poly.edu/admission/page.html#tuition'
        page = Page(url, depth=1, score=9)
        queue = TaskQueue()
        keywords = ['nyu', 'poly']

        self.assertTrue(
            vc.normalize_link(url) ==
            'http://www.poly.edu/admission/page.html')

        url2 = 'http://www.poly.edu/admission/page.html#tuition#abc'
        self.assertTrue(
            vc.normalize_link(url2) ==
            'http://www.poly.edu/admission/page.html')
    def test_simplify_url(self):
        url = "http://www.poly.edu/admission/../page.html"
        page = Page(url, depth=1, score=9)
        queue = TaskQueue()
        keywords = ['nyu', 'poly']

        self.assertTrue(
            vc.simplify_link(url) == 'http://www.poly.edu/page.html')

        url2 = 'http://www.poly.edu/./page.html'
        self.assertTrue(
            vc.simplify_link(url2) == 'http://www.poly.edu/page.html')

        url3 = 'http://www.poly.edu/../../../../page.html'
        self.assertTrue(
            vc.simplify_link(url3) == 'http://www.poly.edu/page.html')

        url4 = 'http://www.poly.edu/aa/bb/cc/../page.html'
        self.assertTrue(
            vc.simplify_link(url4) == 'http://www.poly.edu/aa/bb/page.html')

        url5 = 'http://www.poly.edu/aa/bb/cc/../../../page.html'
        self.assertTrue(
            vc.simplify_link(url5) == 'http://www.poly.edu/page.html')

        url6 = 'http://www.poly.edu/aa/bb/cc/../../../../page.html'
        self.assertTrue(
            vc.simplify_link(url6) == 'http://www.poly.edu/page.html')

        url7 = 'http://www.poly.edu/./././aa/././././bb/./cc/.././././page.html'
        self.assertTrue(
            vc.simplify_link(url7) == 'http://www.poly.edu/aa/bb/page.html')

        url8 = [
            'http://www.poly.edu/index.html',
            'http://www.poly.edu/index.htm',
            'http://www.poly.edu/index.jsp',
            'http://www.poly.edu/index.asp',
            'http://www.poly.edu/index.aspx',
            'http://www.poly.edu/index.php',
        ]

        for url in url8:
            self.assertTrue(vc.simplify_link(url) == 'http://www.poly.edu')

        url9 = 'http://www.poly.edu/a/../../b/index.html'
        self.assertTrue(vc.simplify_link(url9) == 'http://www.poly.edu/b')
示例#10
0
def main():

    task_queue = TaskQueue()

    df = KalmanFilter()
    dd = GetDistance()
    dw = DrawSingle(ylim_min=0, ylim_max=10, key='distance')

    task_queue.append(df)
    task_queue.append(dd)
    task_queue.append(dw)

    socketRun(task_queue.update, port=8070)
 def test_init(self):
     q = TaskQueue()
     self.check_empty_queue(q)