Пример #1
0
def chinaUnicomAPI(phone_attr):
    """
    :param phone_attr: dict(phone=XX, province=XX,city=XX, company=XX, password=XX)
    :param password: 全为数字的字符串(长度不少于6位)
    :return:
    """
    # makeDirs()
    check_param = checkAttr(phone_attr)
    if check_param != True:
        return check_param  # 返回参数错误

    spider = ChinaUnicom(phone_attr)
    login = spider.loginSys()
    if login['code'] != 2000:
        return returnResult(login['code'], data={},
                            desc='spider.loginSys()')  # 返回登陆错误信息
    else:
        spider.getUserInfo()
        spider.getCallInfo()
        # spider.saveItems()
        # clawLog(phone_attr, log)
        data = dict(t_operator_user=spider.user_items,
                    t_operator_call=spider.call_items,
                    t_operator_note=list())
        # 返回爬取结果
        return returnResult(2000, data=data, desc='爬取内容成功')


# end

# import bs4
# bs4.BeautifulSoup.strings
Пример #2
0
def creditReportAPI(name, password, auth_pwd, debug=True):
    """
    实现接口,当debug为True时解析本地的测试html返回结果
    :param name: 用户名
    :param password: 登录密码
    :param auth_pwd: 身份验证码
    :param debug:
    :return: dict(person = 字典列表, card = 字典列表, query = 字典列表)
    """
    name = name.strip()
    password = password.strip()
    auth_pwd = auth_pwd.strip()

    person = CreditReport(name, password, auth_pwd)
    #要是流程成功最终保存用户的征信报告以html的格式
    #最终会返回一个字典给result
    result = person.visitSys()
    if result['result'] == 2000:
        html_name = result['file_name']

        #如果要对页面进行解析则设置debug=True
        if debug == True:
            result = debugTest(name, password, auth_pwd, filename=html_name)

        return returnResult(code=2000, data=result)
    else:
        return returnResult(code=result['result'], data={})
Пример #3
0
 def getUniqueTag():
     # form = {'month': '201602'}
     # form['month'] = month
     form = dict(month=month)
     url = 'http://gd.10086.cn/commodity/servicio/nostandardserv/realtimeListSearch/query.jsps'
     self.__headers[
         'Referer'] = 'http://gd.10086.cn/my/REALTIME_LIST_SEARCH.shtml?dt=1469030400000'
     options = {
         'method': 'post',
         'url': url,
         'form': form,
         'cookies': self.cookies,
         'headers': self.__headers
     }
     response = Request.basic(options)
     if response:
         try:
             unique_tag = json.loads(
                 response.text)['attachment'][0]['value']
             return getMonthRecords(unique_tag)
         except (KeyError, IndexError, Exception) as ex:
             print 'unique_tag not found, error:', ex
             # return False
             return returnResult(4100, [], desc=u'getUniqueTag 解析错误')
     else:
         # return False
         return returnResult(4000, [], desc=u'getUniqueTag 网络错误')
Пример #4
0
 def trigger_status():
     response = verification_code_trigger()
     if response:
         if response.status_code == 200:
             try:
                 info = response.json()
             except Exception as _:
                 return returnResult(4100, [], desc=u'动态验证码解析错误')
             if info['type'] == 'SUCCESS_COMPLETE':
                 msg = info['content']
                 return returnResult(2000, [], desc=msg)
             else:
                 if 'failMessage' in info['content']:
                     msg = info['content']['failMessage']
                 elif 'message' in info['content'][0]:
                     msg = info['content'][0]['message']
                 else:
                     msg = info['content'][0]
                 return returnResult(4800, [], desc=msg)
         else:
             return returnResult(4000, [],
                                 desc=u'动态验证码请求: {}'.format(
                                     response.status_code))
     else:
         return returnResult(4000, [], desc=u'动态验证码请求网络错误')
Пример #5
0
        def clawPageCall(date_tuple, page_no=1, resend=2):  #完成单次请求[存在网络繁忙则重传]
            """完成单次请求"""
            params = {'_': '1468549625712', 'menuid': '000100030001'}
            form = {
                'pageNo': '1',
                'pageSize': '20',
                'beginDate': '2016-07-01',
                'endDate': '2016-07-18'
            }
            form['pageNo'] = page_no
            form['beginDate'] = date_tuple[0]
            form['endDate'] = date_tuple[1]
            params['_'] = getTimestamp()

            url = 'http://iservice.10010.com/e3/static/query/callDetail'
            self.headers['Referer'] = 'http://iservice.10010.com/' \
                                      'e3/query/call_dan.html?menuId=000100030001'

            options = {
                'method': 'post',
                'url': url,
                'form': form,
                'params': params,
                'cookies': self.cookies,
                'headers': self.headers
            }

            response = Request.basic(options)
            if response:
                try:
                    page_json = json.loads(response.text)
                except ValueError:
                    # return False
                    return returnResult(4100, [], desc='clawPageCall1')
                else:
                    if 'errorMessage' in page_json.keys(
                    ) and resend > 0:  # 存在系统繁忙
                        try:
                            if page_json['errorMessage'][
                                    'respCode'] == '4114030193':
                                return clawPageCall(date_tuple, page_no,
                                                    resend - 1)  # 繁忙重传
                        except KeyError:
                            # return False
                            return returnResult(4100, [], desc='clawPageCall2')
                    else:
                        return response.text
            else:
                # return False
                return returnResult(4000, [], desc='clawPageCall')
Пример #6
0
def start(**kwargs):
    data = kwargs.get('data')
    msg_no=kwargs.get("msg_no")
    phone_list=getPhonelist(data)
    for p in phone_list:
        p["msg_no"]=msg_no
    #调用登录的函数进行登录
    browser=login_for_crawler()
    #对browser进行判断
    if type(browser)==dict:
        data=browser
        return returnResult(code=data["code"],data=None)
    else:
        c = check(browser)
        result_data = c.checkAPI(phone_list)
        return returnResult(2000, result_data)
Пример #7
0
 def judgeLogin(response):
     """ 对登录response进行分析
     :param response: response obj
     :return: 登录状态码dict()/raise
     """
     try:
         code = re.search(r'resultCode:"(.*?)"', response.text).group(1)
     except (AttributeError, IndexError) as ex:
         return dict(code=4000, func='judgeLogin')
     else:
         code_hash = {
             '0000': 2000,  # 流程成功
             '7007': 4600,  # 密码错误
             '7999': 5500,  # 对方服务器繁忙
             '7072': 4500,  # 账号错误
             '7009': 4500  # 账号错误
         }
         if code in code_hash.keys():
             self.cookies.update(dict_from_cookiejar(response.cookies))
             return dict(code=code_hash[code],
                         desc='judgeLogin',
                         data=[])
         else:
             # raise Exception(u'未知错误')
             return returnResult(
                 4000, [], desc=unicode('登陆错误代码: {}').format(code))
Пример #8
0
 def clawInfo(text):
     try:
         # selector = etree.HTML(text)
         # table = selector.xpath('//table[@class="tb02"]')[0]
         # values =  table.xpath('tbody/tr[2]/td/text()')
         # if len(values) == 0:
         #     values =  table.xpath('tr[2]/td/text()')
         print(u'用户状态', text)
         item = dict(
             # phone = values[0],
             # name = values[1],
             # cert_num = values[2],
             # open_date = values[4],
             # uese_valid = re.search(u'用户状态</td>\\s+<td>(.*?)</td>',
             #                        text),
             user_valid=1,
             company=self.phone_attr['company'],
             province=self.phone_attr['province'],
             city=self.phone_attr['city'],
             level=re.search(u"link'\)\.html\('(.*?)'\)",
                             text).group(1),
             phone=re.search(u'手机号码</td>\\s+<td>(.*?)</td>',
                             text).group(1),
             name=re.search(u'用户名</td>\\s+<td>(.*?)</td>',
                            text).group(1),
             cert_num=re.search(u'身份证</td>\\s+<td>(.*?)</td>',
                                text).group(1),
             open_date=re.search(u'入网时间</td>\\s+<td>(.*?)</td>',
                                 text).group(1),
             product_name=re.search(u'所属品牌</td>\\s+<td>(.*?)</td>',
                                    text).group(1),
             cert_type=u'身份证',
             # province=self.phone_attr['province'],
             # city=self.phone_attr['city'],
         )
         # 填充字段
         [item.setdefault(i, '') for i in config.COLUMN_USER]
         self.user_items.append(item)  # 保存记录
         return returnResult(2000, [], desc=u'获取客户信息成功')
     except AttributeError:
         # TODO: 确认已登陆
         return returnResult(4100, [], desc=u'获取客户信息解析错误')
     except (IndexError, Exception) as ex:
         # return False
         return returnResult(4000, [], desc=u'获取客户信息网络错误')
Пример #9
0
    def fetch_session(self):
        from requests import cookies

        session_response = self.session_request()
        if session_response:
            if session_response.status_code == 200:
                try:
                    self.cookies = cookies.merge_cookies(
                        self.cookies, session_response.cookies)
                    return returnResult(2000, [], desc=u'获取 session 成功')
                except:
                    return returnResult(4100, [], desc=u'获取 session 解析错误')
            else:
                returnResult(4000, [],
                             desc=u'获取 session 网络错误: {}'.format(
                                 session_response.status_code))
        else:
            return returnResult(4000, [], desc=u'获取 session 网络错误')
Пример #10
0
 def get_encryption_key(self):
     key_response = self.encryption_key_request()
     if key_response:
         if key_response.status_code == 200:
             key_match = re.search(u'"e"\:"(.*?)".*?"n"\:"(.*?)"',
                                   key_response.content.decode('utf-8'))
             try:
                 keys_str = key_match.group(1)
                 mods_str = key_match.group(2)
                 self.cookies = key_response.cookies
                 return keys_str, mods_str
             except Exception as _:
                 return returnResult(4100, [], desc=u'加密 key 解析错误')
         else:
             returnResult(4000, [],
                          desc=u'获取加密 key 网络错误: {}'.format(
                              key_response.status_code))
     else:
         return returnResult(4000, [], desc=u'获取加密 key 网络错误')
Пример #11
0
        def getMonthRecords(unique_tag):

            form = dict(uniqueTag=unique_tag, monthListType='0')
            url = 'http://gd.10086.cn/commodity/servicio/nostandardserv/realtimeListSearch/ajaxRealQuery.jsps'
            # pay attention to "timeout"
            options = {
                'method': 'post',
                'url': url,
                'form': form,
                'cookies': self.cookies,
                'timeout': 20,
                'headers': self.__headers
            }
            response = Request.basic(options)
            if response:
                return returnResult(2000,
                                    response.text,
                                    desc=u'getMonthRecords 成功')
            else:
                # return False
                return returnResult(4000, [], desc=u'getMonthRecords 网络错误')
Пример #12
0
 def get_login_url(self, login_response):
     if login_response:
         if login_response.status_code == 200:
             try:
                 info = login_response.json()
             except Exception as _:
                 return returnResult(4100, [], desc=u'登陆网址解析错误')
             if info['type'] == 'ucs.server.location.url':
                 msg = info['content']
                 self.cookies = login_response.cookies
                 return returnResult(2000, msg, desc=msg)
             else:
                 code = 4800
                 if 'failMessage' in info['content']:
                     msg = info['content']['failMessage']
                 elif 'message' in info['content'][0]:
                     msg = info['content'][0]['message']
                 else:
                     msg = info['content']
                 if msg == u'动态密码错误!':
                     code = 4402
                 elif u'密码错误,请重新输入' in msg:
                     code = 4401
                 return returnResult(code, [], desc=msg)
         else:
             returnResult(4000, [],
                          desc=u'获取登陆网址网络错误: {}'.format(
                              login_response.status_code))
     else:
         return returnResult(4000, [], desc=u'获取登陆网址网络错误')
Пример #13
0
 def queryInfo():
     form = {'servCode': 'MY_BASICINFO'}
     url = 'http://gd.10086.cn/commodity/servicio/track/servicioDcstrack/query.jsps'
     self.__headers[
         'Referer'] = 'http://gd.10086.cn/my/myService/myBasicInfo.shtml'
     options = {
         'method': 'post',
         'url': url,
         'form': form,
         'cookies': self.cookies,
         'headers': self.__headers
     }
     response = Request.basic(options)
     if response:
         # TODO: auto found
         print(response.text)
         return getInfo()
     else:
         # return False
         return returnResult(4000, [], desc=u'queryInfo 网络错误')
Пример #14
0
 def getInfo():
     form = {'servCode': 'MY_BASICINFO', 'operaType': 'QUERY'}
     url = 'http://gd.10086.cn/commodity/servicio/servicioForwarding/queryData.jsps'
     self.__headers[
         'Referer'] = 'http://gd.10086.cn/my/myService/myBasicInfo.shtml'
     options = {
         'method': 'post',
         'url': url,
         'form': form,
         'cookies': self.cookies,
         'timeout': 30,
         'headers': self.__headers
     }
     response = Request.basic(options)
     if response:
         # return clawInfo(response.text)
         return clawInfo(response.content.decode('utf-8'))
     else:
         # return False
         return returnResult(4000, [], desc=u'getInfo 网络错误')
Пример #15
0
    def clawAllInfo(self):
        # try:
        #     self.browser.find_element_by_xpath('//div[@id="mathBox"]/div/a[1]').click()  # 点击查询
        #     self.browser.implicitly_wait(_time_usual)
        # except NoSuchElementException as ex:
        #     return 4000
        # self.timeSleep()
        # self.cookies = self.getCookies(self.browser.get_cookies())     # cookies更新
        # if len(self.cookies) > 0:

        if self.cookies != dict():
            # self.clawUserInfo()  # 爬取用户信息
            # self.clawCallInfo()  # 爬取通话记录
            user_info = self.clawUserInfo()  # 爬取用户信息
            if user_info['code'] == 2000:
                call_info = self.clawCallInfo()  # 爬取通话记录
                return call_info
            else:
                return user_info
        else:
            return returnResult(4000, [], desc=u'网络错误,cookie 为空')
Пример #16
0
def loginSys(spider):
    """ 登陆系统
    :param spider: the object of ChinaMobile_GD
    :return:
    """
    if not isinstance(spider, ChinaMobile_GD):
        print 'obj error'
        raise ValueError(u'参数错误')

    # login = spider.login()
    # if login == 2000:  # 登录成功

    login = spider.fetch_cookie()
    if login['code'] == 2000:  # 登录成功
        # login = 2000
        # if login == 2000:
        print u'登录成功'
        search = spider.clawAllInfo()  # 爬取内容
        # if search == 2000:
        if search['code'] == 2000:
            print u'爬取内容成功'
            # print spider.saveItems()
            result = dict(
                t_operator_user=spider.user_items,
                t_operator_call=spider.call_items,
                t_operator_note=spider.note_items,
            )
            # spider.browser.close()
            # return dict(code=2000, result=result)
            return returnResult(2000, result, search['desc'])
        else:
            return search
    else:
        # print u'登录失败,失败码:{0}'.format(login)
        # spider.browser.close()
        # return dict(code=login, temp=None) # 密码错误4401,动态码错误4402
        return login
Пример #17
0
    def set_login_cookie(self, login_url):
        from requests import cookies

        login_url_response = self.login_url_request(login_url)
        if login_url_response:
            if login_url_response.status_code == 302:
                if login_url_response.cookies:
                    try:
                        self.cookies = cookies.merge_cookies(
                            self.cookies, login_url_response.cookies)
                        return returnResult(2000, [], desc=u'登陆网址成功')
                    except:
                        return returnResult(4100, [], desc=u'无法获取 cookie')
                else:
                    return returnResult(4100, [], desc=u'cookie 获取错误')
            else:
                returnResult(4000, [],
                             desc=u'登陆网址网络错误: {}'.format(
                                 login_url_response.status_code))
        else:
            return returnResult(4000, [], desc=u'登陆网址网络错误')
Пример #18
0
 def clawCallInfo(self):
     """ Save all call records
     :return: null
     """
     item = {
         'cert_num': self.user_items[0]['cert_num'],
         'phone': self.user_items[0]['phone']
     }
     text_seq = self.getFiveMonthCall()
     # if len(text_seq) > 0:
     desc_list = [u'爬取记录成功,但存在未能爬取信息的月份:']
     for text in text_seq:
         if isinstance(text[1], dict):
             desc_list.append(u'{}, 错误原因: {};'.format(
                 unicode(text[0]), unicode(text[1]['desc'])))
         else:
             try:
                 results = json.loads(
                     text[1])['content']['realtimeListSearchRspBean'][
                         'calldetail']['calldetaillist']
                 sms_results = (json.loads(
                     text[1])['content']['realtimeListSearchRspBean']
                                ['smsdetail']['smsdetaillist'])
             except Exception as e:
                 desc_list.append(u'{}, 错误原因: {};'.format(
                     unicode(text[0]), unicode(e.message)))
             for record in results:
                 temp = copy(item)
                 # 'place', 'time', 'time', 'chargefee','period', 'contnum', 'becall', 'conttype'
                 for k, v in record.items():
                     if k in config.KEY_CONVERT_CALL.keys():
                         column_name = config.KEY_CONVERT_CALL[k]
                         temp[column_name] = v
                 try:
                     # 入库修正
                     self.convertValues(temp)
                 except Exception as ex:
                     print ex
                     for k, v in temp.items():
                         print k, v
                 self.call_items.append(temp)
             for record in sms_results:
                 temp = copy(item)
                 # 'time', 'fee', 'smstype', 'smsnum'
                 for k, v in record.items():
                     if k in config.KEY_CONVERT_NOTE_MOBILE.keys():
                         column_name = config.KEY_CONVERT_NOTE_MOBILE[k]
                         temp[column_name] = v
                 try:
                     # 入库修正
                     self.convert_value_note(temp)
                 except Exception as ex:
                     print ex
                     for k, v in temp.items():
                         print k, v
                 self.note_items.append(temp)
     if len(desc_list) == 7:
         return returnResult(4000, [], desc=u'爬取记录网络错误')
     elif 7 > len(desc_list) > 1:
         return returnResult(2000, [], desc=''.join(desc_list))
     elif len(desc_list) == 1:
         return returnResult(2000, [], desc=u'爬取记录成功')
Пример #19
0
def checkAttr(phone_attr):
    _key = ('phone', 'province', 'city', 'company', 'password')
    if not isinstance(phone_attr, dict) or set(phone_attr.keys()) != set(_key):
        return returnResult(4400, data={})
    else:  # 参数正确返回True
        return True