示例#1
0
    def visit_article(self):
        t = get_13_time()
        url = 'https://www.toutiao.com/c/user/article/'
        headers = {'Referer': 'https://www.toutiao.com/c/user/56654489607/'}
        params = {
            'page_type': '1',
            'user_id': '56654489607',
            'max_behot_time': t,
            'count': '20',
            'as': 'A105DCF003A4247',
            'cp': '5C035412A4A72E1',
            '_signature': 'aSGH2hAYNSY-Fs-BeDJWyWkhh8'
        }
        resp = self.session.get(url, headers=headers, params=params)
        json_info = resp.json()

        print()
示例#2
0
 def parse_list(self,response):
     next_meta = response.meta
     companyId = next_meta['companyId'].strip()
     # print(companyId,response.text)
     n=0
     while n<95:
         # try:
         t = get_13_time()
         # 'https://www.liepin.com/company/sojob.json?pageSize=15&curPage=0&ecompIds=8091337&dq=&publishTime=&keywords=&_=1550383073951'
         url=f'https://www.liepin.com/company/sojob.json?pageSize=15&curPage={n}&ecompIds={companyId}&dq=&publishTime=&keywords=&_={t}'
         n+=1
         headers={
             'referer':'https://www.liepin.com/ajaxproxy.html'
         }
         cookies={
             '__uuid': '1550017147980.22',
             '_uuid': 'E4361B46FFA8441973EC46E6488BD983',
             'is_lp_user': '******',
             'need_bind_tel': 'false',
             'new_user': '******',
             'c_flag': 'f57e19ed294147b87179e4e6132477f5',
             'imClientId': '45e417dd37f82ac674cdcbb355984626',
             'imId': '45e417dd37f82ac6a36687782a0c1c67',
             'imClientId_0': '45e417dd37f82ac674cdcbb355984626',
             'imId_0': '45e417dd37f82ac6a36687782a0c1c67',
             'gr_user_id': '374534ce-aa54-4880-88ca-7a7bb7adf340',
             'bad1b2d9162fab1f80dde1897f7a2972_gr_last_sent_cs1': '463d81f04fd219c61a667e00ad0d9493',
             'grwng_uid': 'f3fda8f8-0c2e-4f29-8507-f42f7a9671ec',
             'fe_work_exp_add': 'true',
             'ADHOC_MEMBERSHIP_CLIENT_ID1.0': 'fa804ff0-2a02-3f31-8dcb-8e13b527dfcb',
             'bad1b2d9162fab1f80dde1897f7a2972_gr_cs1': '463d81f04fd219c61a667e00ad0d9493',
             '__tlog': '1550383052778.97%7C00000000%7C00000000%7C00000000%7C00000000',
             '_mscid': '00000000',
             'Hm_lvt_a2647413544f5a04f00da7eee0d5e200': '1550233873,1550279247,1550281552,1550383053',
             'abtest': '0',
             '_fecdn_': '0',
             '__session_seq': '2',
             '__uv_seq': '2',
             'Hm_lpvt_a2647413544f5a04f00da7eee0d5e200': '1550383074'
         }
         next_meta['ticker'] = next_meta['ticker']
         print(next_meta['ticker'])
         next_meta['company_name'] = next_meta['company_name']
         print(next_meta['company_name'])
         yield scrapy.Request(url, callback=self.parse_job,meta=next_meta,headers=headers,cookies=cookies)
示例#3
0
    def _visit_queryOrderWaitTime(self):
        url = 'https://kyfw.12306.cn/otn/confirmPassenger/queryOrderWaitTime'
        params = {
            'random': get_13_time(),
            'tourFlag': 'dc',
            '_json_att': '',
            'REPEAT_SUBMIT_TOKEN': self.globalRepeatSubmitToken
        }
        r = self.session.get(url, params=params)
        json_data = r.json()

        orderId = json_data['data']['orderId']

        if orderId:
            print('queryOrderWaitTime 出票成功, id:', orderId)
            return True
        else:
            print('queryOrderWaitTime 还没有出票, 预计等待时间:', json_data['data']['waitTime'])
            return False
示例#4
0
    def _visit_captcha_check(self):
        url = 'https://kyfw.12306.cn/passport/captcha/captcha-check'
        answer = self._get_vcode()
        params = {
            # 'callback': 'jQuery1910292836177545567_1542355491387',
            'answer': answer,
            'rand': 'sjrand',
            'login_site': 'E',
            '_': get_13_time(),
        }
        response = self.session.get(url, params=params)

        # 会抛出 json 错误
        json_data = response.json()

        if json_data['result_code'] == '4':
            print('验证码成功')
            self.vcode_answer = answer
            return True
        else:
            print('验证码失败:', self.vcode)
            return False
示例#5
0
    def _visit_captcha_image64(self):
        url = 'https://kyfw.12306.cn/passport/captcha/captcha-image64'
        t = get_13_time()
        params = {
            'login_site': 'E',
            'module': 'login',
            'rand': 'sjrand',
            t: '',
            # 'callback': 'jQuery1910292836177545567_1542355491387',
            '_': str(int(t) - random.randint(1800, 2500)),
        }
        # 一定要用 self.session
        r = self.session.get(url, params=params)

        json_data = r.json()

        image = json_data['image']

        import base64
        content = base64.b64decode(image)
        # 返回图片的 bytes
        return content
示例#6
0
import json

import scrapy
import re
from datetime import datetime
import pandas as pd
import time
from common.util import get_13_time
t = get_13_time()
from liepinSpecialComJob.items import LiepinspecialcomjobItem


class LiepinSpdier(scrapy.Spider):
    name = 'liepin'
    start_urls = ['https://vip.liepin.com/883905/1405577359643.shtml',
                  'https://vip.liepin.com/8161070/joblist.shtml',
                  # 'http://maker.haier.net/custompage/socialchannel/index.html?platformcode=lp',
                  'https://vip.liepin.com/7855333/joblist.shtml',
                  'https://vip.liepin.com/8090130/1409730340536.shtml',
                  'https://vip.liepin.com/8399212/joblist.shtml',
                  'https://vip.liepin.com/1198424/joblist2.shtml',
                  'https://vip.liepin.com/8787971/joblist.shtml',
                  'https://vip.liepin.com/8796178/joblist2.shtml',
                  'https://vip.liepin.com/8091337/1426475303042.shtml',
                  'https://vip.liepin.com/7904788/job.shtml',
                    ]

    def parse(self, response):
        text = response.text
        company_name = re.search(r'<title>(.*?) - 猎聘网招聘官网',text).group(1)
        companyId=re.search(r'CONFIG={"companyId":"([0-9]+)"}',text).group(1)