示例#1
0
    async def get_pages(self, item):
        data = item.data
        item.data = data.format(1, random.random())
        logging.info('get_pages: ' + item.data)

        resp = await self.async_web_request(item)
        if resp is None:
            self.add_task('get_pages', item)
            return None

        html_string = sync_text(resp)
        if not html_string:
            self.add_task('get_pages', item)
            return None

        js = json.loads(html_string)
        if js['info'] != 'success' or js['status'] != 1:
            logging.error('[get_pages]: {}\n{}'.format(item.data),
                          json.dumps(js, ensure_ascii=False))
            self.add_task('get_pages', item)
            return None

        for page_num in range(1, int(js['data'][1]) // 20 + 1):
            qs_item = Item(
                dict(
                    method='POST',
                    url='http://www.wln100.com/Home/Index/getTestList.html',
                    data=data.format(page_num, random.random()),
                    info=item.info,
                    headers=headers,
                ))
            self.add_task('get_questions', qs_item)
示例#2
0
def make_qs_item(qid):
    item = Item(dict(
        method = 'GET',
        url = 'http://www.wln100.com/Test/{}.html'.format(qid),
        max_retry = 3,
        timeout = 30,
        headers = dict(HEADERS),
    ))
    return item
示例#3
0
def make_item(qid):
    url = URL.format(qid)
    item = Item(dict(
        method='GET',
        url=url,
        max_retry=2,
        timeout=60,
    ))
    return item
示例#4
0
def make_item(url, data, info):
    item = Item(dict(
        method = 'POST',
        url = url,
        data = data,
        info = info,
        max_retry = 2,
        timeout = 120,
    ))
    return item
示例#5
0
def make_page_item(info):
    url = 'http://www.dz101.com/zujuan/zhishidian/Problems'
    item = Item(
        dict(
            method='GET',
            url=url + '?' + PARAM.format(**info),
            max_retry=2,
            timeout=120,
        ))
    return item
示例#6
0
def make_as_item(qid):
    item = Item(dict(
        method = 'POST',
        url = 'http://www.wln100.com/Test/TestPreview/getOneTestById.html',
        data = 'id={}&width=500&s={}'.format(qid, random.random()),
        max_retry = 3,
        timeout = 30,
        headers = dict(HEADERS),
    ))
    return item
示例#7
0
def make_item(qid):
    url = URL.format(qid)
    item = Item(
        dict(
            method='GET',
            url=url,
            headers=HEADERS,
            max_retry=2,
            timeout=20,
        ))
    return item
示例#8
0
def iter_pre_item():
    SUBJS = (
        {
            "SubjectID": "12",
            "aft_subid": 21,
        },
        {
            "SubjectID": "13",
            "aft_subid": 22,
        },
        {
            "SubjectID": "14",
            "aft_subid": 23,
        },
        {
            "SubjectID": "15",
            "aft_subid": 25,
        },
        {
            "SubjectID": "16",
            "aft_subid": 26,
        },
        {
            "SubjectID": "17",
            "aft_subid": 29,
        },
        {
            "SubjectID": "18",
            "aft_subid": 30,
        },
        {
            "SubjectID": "19",
            "aft_subid": 28,
        },
        {
            "SubjectID": "20",
            "aft_subid": 27,
        },
    )

    for info in SUBJS:
        item = Item(
            dict(
                method='POST',
                url='http://www.wln100.com/Home/Index/getTestList.html',
                data=
                'sid={}&kid=0&tid=0&dtid=0&dif=0&o=0&page={{}}&sourceid=0&rand={{}}'
                .format(info['SubjectID']),
                info=info,
                headers=headers,
            ))
        yield item
示例#9
0
    async def run(self):
        return None  # has done, no do it again
        item = Item(
            dict(
                method='GET',
                url='http://zx.17zuoye.com/teacher/assign/books?grade_id=0&_={}'
                .format(int(time.time() * 1000)),
                headers=headers,
                cookies=self.cookies,
            ))

        while True:
            resp = await self.async_web_request(item)
            js = json.loads(resp)
            #js = json.loads(sync_text(resp))
            if js['error_code'] != 0 or js['success'] != True:
                logging.error('[get_pages]: {}\n{}'.format(item.data),
                              json.dumps(js, ensure_ascii=False))
                item.proxy = 'http://' + _proxy.get()
                self.stop()
            else:
                break

        for book in js['data']['books']:
            book['subject'] = 2
            item = Item(
                dict(
                    method='POST',
                    url='http://zx.17zuoye.com/teacher/assign/searchQuestions',
                    data='book_id={}&lesson_id={}&page=1'.format(
                        book['_id'], book['series_id']),
                    headers=headers,
                    info=book,
                    cookies=self.cookies,
                    max_retry=2,
                    timeout=10,
                ))
            self.add_task('get_pages', item)
示例#10
0
def make_item(subj, page, _type='xt'):
    # _type = None  系统题目
    # _type = 'qp'  全品题目
    if _type == 'qp':
        url = ('http://tiku.manfen5.com/zujuan/UserSTListAjax.aspx?'
               'type=getUserST&UnionID=10050&CourseID={}&ZSDZJType='
               '&ZSDZJID=&EndID=0&STTX=&STLeavel=&page={}').format(subj, page)
    elif _type == 'xt':
        url = ('http://tiku.manfen5.com/zujuan/STListAjax.aspx?'
               'type=getST&CourseID={}&ZSDZJType=&ZSDZJID='
               '&EndID=0&STTX=&STLeavel=&IsOnlineTest=&page={}').format(
                   subj, page)

    item = Item(dict(
        method='POST',
        url=url,
        max_retry=2,
        timeout=80,
    ))
    item.subj = subj
    item._type = _type
    item.name = '{}_{}_{}'.format(subj, _type, page)
    return item
示例#11
0
 async def run(self):
     return None  # use task_queue
     item = Item(
         dict(
             method='POST',
             url='http://zx.17zuoye.com/teacher/assign/searchQuestions',
             data=
             'book_id=BK_20300001489009&lesson_id=BKC_20300076895304&page=1',
             headers=headers,
             info={'subject': 3},
             cookies=self.cookies,
             max_retry=2,
             timeout=10,
         ))
     self.add_task('get_pages', item)
示例#12
0
    async def get_questions(self, item):
        if self.no_new_question > 5:
            return None

        logging.info('get_questions: ' + item.data)

        resp = await self.async_web_request(item)
        if resp is None:
            self.add_task('get_questions', item)
            return None

        html_string = sync_text(resp)
        if not html_string:
            self.add_task('get_questions', item)
            return None

        js = json.loads(html_string)
        if js['info'] != 'success' or js['status'] != 1:
            logging.error('[get_pages]: {}'.format(item.data))
            self.add_task('get_questions', item)
            return None

        save_question(js, item.info, json.dumps(item.json(),
                                                ensure_ascii=False))

        no_new = True
        for qs in js['data'][0]:
            if is_archived(qs['testid']):
                continue

            no_new = False
            as_item = Item(
                dict(
                    method='POST',
                    url='http://www.wln100.com/Home/Index/getOneTestById.html',
                    data='id={}&width=500&s={}'.format(qs['testid'],
                                                       random.random()),
                    headers=headers,
                    info=item.info,
                    cookies=self.cookies,
                ))
            self.add_task('get_answer', as_item, qs['testid'])

        if no_new:
            self.no_new_question += 1
示例#13
0
    async def get_pages(self, item):
        item.proxy = 'http://' + _proxy.get()
        item.max_retry = 2
        item.timeout = 10
        item.cookies = self.cookies
        logging.info('get_pages: ' + item.data)

        resp = await self.async_web_request(item)
        if resp is None:
            item.proxy = 'http://' + _proxy.get()
            self.add_task('get_pages', item)
            return None

        html_string = sync_text(resp)
        if not html_string:
            item.proxy = 'http://' + _proxy.get()
            self.add_task('get_pages', item)
            return None

        js = json.loads(html_string)
        if js['error_code'] != 0 or js['success'] != True:
            logging.error('[get_pages]: {}\n{}'.format(item.data),
                          json.dumps(js, ensure_ascii=False))
            item.proxy = 'http://' + _proxy.get()
            self.add_task('get_pages', item)
            en_accounts[self.u]['block'] = True
            self.login17()
            return None

        for page_num in range(1, int(js['data']['page_count'])):
            qs_item = Item(
                dict(
                    method='POST',
                    url='http://zx.17zuoye.com/teacher/assign/searchQuestions',
                    data=
                    'book_id=BK_20300001489009&lesson_id=BKC_20300076895304&page={}'
                    .format(page_num),
                    headers=headers,
                    cookies=self.cookies,
                    info=item.info,
                    max_retry=2,
                    timeout=10,
                ))
            self.add_task('get_questions', qs_item)
示例#14
0
    async def run(self):
        rows = get_question_ids()
        for row in rows:
            testid = row[0][10:]
            if is_archived(testid):
                continue

            info = dict(aft_subid=row[1])
            as_item = Item(
                dict(
                    method='POST',
                    url='http://www.wln100.com/Home/Index/getOneTestById.html',
                    data='id={}&width=500&s={}'.format(testid,
                                                       random.random()),
                    headers=headers,
                    info=info,
                    cookies=self.cookies,
                ))
            self.add_task('get_answer', as_item, testid)