Python get_in示例，text.get_in Python示例

示例#1

0

显示文件

文件： msn_post.py 项目： waitingzeng/ttwait-code

    def _parse_contact_list(self, page):
        """
        ControlDataIndex:
{cid:"0",showmenu:"1",menudefault:"2",name:"3",contactid:"4",deccid:"5",address:"6",membername:"7",additionalchickletdata:"8",thirdpartyname:"9",attachmenutobody:"10",placedintable:"11",menucustom:"12",tileitemid:"13",nameitemid:"14",psmitemid:"15",badgeitemid:"16",actiontypeurloverride:"17",childicid:"18"}
        """
        if page is None:
            return []
        if not self.cid:
            self.cid = get_in(page, '"vcid":"', '"')
            self.mailhost = get_in(self.web.url, '//', '/mail/')
        cids = []
        try:
            datatxt = get_in(page, 'window.ic_control_data = ', ';')
            data = json.loads(datatxt)
            #print data
            for i in range(2, 27):
                key = 'ic%x' % i
                if key in data:
                    cids.append([
                        data[key][0], data[key][4], data[key][5], data[key][6],
                        data[key][3]
                    ])
                    #if data[key][6]:
                    #    cids.append(data[key][6])
        except Exception, info:
            self.error(info)
            sys.exit()

示例#2

0

显示文件

文件： createapp.py 项目： waitingzeng/ttwait-code

 def login(self):
     res = self.web.get_page(self.loginUrl)
     if res is None:
         print 'login:get %s fail' % self.loginUrl
         return False
     self.loginPostUrl = text.get_in(res, 'action="', '"').replace("&amp;", '&')
     data = self.getLoginData(res)
     if data is None:
         return False
     header = {
         'referer' : self.web.resp.url
     }
     res = self.web.get_page(self.loginPostUrl, data, header)
     if res is None:
         print 'login:post %s fail' % self.loginPostUrl
         return False
     respurl = self.web.url
     if respurl.find('WITHOUT_PHONE') != -1:
         self.web.get_page(self.SKIP_URL)
     print respurl
     if respurl.find("accounts/CheckCookie") != -1:
         url = text.get_in(res, 'location.replace("', '"')
         url = url.replace('\\x3d', '=').replace('\\x26', '&')
         res = self.web.get_page(url)
         if self.web.resp.url.find('appengine.google.com') != -1:
             return True
     elif respurl.find('https://appengine.google.com') != -1:
         return True
     else:
         return False

示例#3

0

显示文件

 def get_auto_refresh_url(self, page):
     page = page.lower()
     noscript = get_in(page, '<noscript>', '</noscript>')
     if not noscript:
         return None
     for meta in get_in_list(noscript, '<meta', '/>'):
         equiv = get_in(meta, 'http-equiv="', '"')
         if equiv and equiv == 'refresh':
             refresh = get_in(meta, 'content="', '"')
             ii = refresh.find(";")
             if ii != -1:
                 pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
                 jj = newurl_spec.find("=")
                 if jj != -1:
                     key, url = newurl_spec[:jj], newurl_spec[jj+1:]
                 if key.strip().lower() != "url":
                     continue
             else:
                 continue
             
             if pause > 1E-3:
                 time.sleep(pause)
             url = iso_to_char(url)
             return url
     return None

示例#4

0

显示文件

文件： msn_live.py 项目： waitingzeng/ttwait-code

    def get_auto_refresh_url(self, page):
        page = page.lower()
        noscript = get_in(page, '<noscript>', '</noscript>')
        if not noscript:
            return None
        for meta in get_in_list(noscript, '<meta', '/>'):
            equiv = get_in(meta, 'http-equiv="', '"')
            if equiv and equiv == 'refresh':
                refresh = get_in(meta, 'content="', '"')
                ii = refresh.find(";")
                if ii != -1:
                    pause, newurl_spec = float(refresh[:ii]), refresh[ii + 1:]
                    jj = newurl_spec.find("=")
                    if jj != -1:
                        key, url = newurl_spec[:jj], newurl_spec[jj + 1:]
                    if key.strip().lower() != "url":
                        continue
                else:
                    continue

                if pause > 1E-3:
                    time.sleep(pause)
                url = iso_to_char(url)
                return url
        return None

示例#5

0

显示文件

文件： msn_live.py 项目： waitingzeng/ttwait-code

    def do_submit(self, page):
        form = get_in(page, '<form name="fmHF" id="fmHF"', '</form>')
        if form is None:
            return page
        action = get_in(form, 'action="', '"')
        data = get_hidden(form)

        page = self.web.get_page(action, data, {'referer': self.web.url})
        return page

示例#6

0

显示文件

    def do_submit(self, page):
        form = get_in(page, '<form name="fmHF" id="fmHF"', '</form>')
        if form is None:
            return page
        action = get_in(form, 'action="', '"')
        data = get_hidden(form)

        page = self.web.get_page(action, data, {'referer' : self.web.url})
        return page

示例#7

0

显示文件

 def auto_refresh(self, res):
     if res is None:
         return None
     lower_res = res.lower()
     if lower_res.find('onload="javascript:dosubmit();"') != -1:
         return self.web.submit(res)
         
     for meta in get_in_list(lower_res, '<meta', '/>'):
         if meta.find('http-equiv="refresh"') != -1:
             refresh = get_in(meta, 'content="', '"')
             ii = refresh.find(";")
             if ii != -1:
                 pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
                 jj = newurl_spec.find("=")
                 if jj != -1:
                     key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
                 if key.strip().lower() != "url":
                     return res
             else:
                 return res
             
             if pause > 1E-3:
                 time.sleep(pause)
             newurl = iso_to_char(newurl)
             res = self.get_page(newurl)
             return res
     return res

示例#8

0

显示文件

文件： check_imap.py 项目： alipro/threebody

    def check_ok_new_emails(self, fid):
        self.conn.select(fid)
        res, data = self.conn.list()
        data, uids = self.conn.uid('search', None, '(UID *:*)')

        if not uids or not uids[0]:
            return None, None
        uids = uids[0].split(' ')
        uids = uids[-1:]
        error, data = self.conn.uid("fetch", ",".join(uids), "(UID BODY.PEEK[])")
        if error != 'OK':
            return None

        for i in reversed(range(0, len(data))):
            item = data[i]
            mo = RE_BODYSTRUCTURE.match(item[0])
            if not mo:
                #logging.error("Returned data not match regex: %s", item)
                continue

            mid = mo.group(1)
            #print item[1]
            #base64_text = get_in(item[1], 'Content-Transfer-Encoding: base64', '------=')
            #msg_text = base64.b64decode(base64_text.strip())
            msg = email.message_from_string(item[1])
            content = msg.get_payload()[0].get_payload(decode=1)
            content = pq(content).text()
            content = content.replace('\n', ' ')
            print repr(content)
            key = get_in(content, 'http', ' ')
            if not key:
                continue
            key = "http" + key.strip()
            return mid, key
        return None, None

示例#9

0

显示文件

    def check_new_emails(self, fid):
        self.conn.select(fid)
        res, data = self.conn.list()
        data, uids = self.conn.uid('search', None, '(UID *:*)')

        if not uids or not uids[0]:
            return None, None
        uids = uids[0].split(' ')
        uids = uids[-1:]
        error, data = self.conn.uid("fetch", ",".join(uids),
                                    "(UID BODY.PEEK[])")
        if error != 'OK':
            return None

        for i in reversed(range(0, len(data))):
            item = data[i]
            mo = RE_BODYSTRUCTURE.match(item[0])
            if not mo:
                #logging.error("Returned data not match regex: %s", item)
                continue

            mid = mo.group(1)
            #print item[1]
            key = get_in(item[1], 'To confirm the transaction, go to:',
                         'To cancel a')
            if not key:
                continue
            url = key.strip()
            return mid, url
        return None, None

示例#10

0

显示文件

文件： check_imap.py 项目： alipro/threebody

    def check_new_emails(self, fid):
        self.conn.select(fid)
        res, data = self.conn.list()
        data, uids = self.conn.uid('search', None, '(UID *:*)')

        if not uids or not uids[0]:
            return None, None
        uids = uids[0].split(' ')
        uids = uids[-1:]
        error, data = self.conn.uid("fetch", ",".join(uids), "(UID BODY.PEEK[])")
        if error != 'OK':
            return None

        for i in reversed(range(0, len(data))):
            item = data[i]
            mo = RE_BODYSTRUCTURE.match(item[0])
            if not mo:
                #logging.error("Returned data not match regex: %s", item)
                continue

            mid = mo.group(1)
            #print item[1]
            key = get_in(item[1], 'To confirm the transaction, go to:', 'To cancel a')
            if not key:
                continue
            url = key.strip()
            return mid, url
        return None, None

示例#11

0

显示文件

文件： msn_live.py 项目： waitingzeng/ttwait-code

    def auto_refresh(self, res):
        if res is None:
            return None
        lower_res = res.lower()
        if lower_res.find('onload="javascript:dosubmit();"') != -1:
            return self.web.submit(res)

        for meta in get_in_list(lower_res, '<meta', '/>'):
            if meta.find('http-equiv="refresh"') != -1:
                refresh = get_in(meta, 'content="', '"')
                ii = refresh.find(";")
                if ii != -1:
                    pause, newurl_spec = float(refresh[:ii]), refresh[ii + 1:]
                    jj = newurl_spec.find("=")
                    if jj != -1:
                        key, newurl = newurl_spec[:jj], newurl_spec[jj + 1:]
                    if key.strip().lower() != "url":
                        return res
                else:
                    return res

                if pause > 1E-3:
                    time.sleep(pause)
                newurl = iso_to_char(newurl)
                res = self.get_page(newurl)
                return res
        return res

示例#12

0

显示文件

文件： html.py 项目： waitingzeng/ttwait-code

def get_input(page, type='hidden'):
    data = {}
    for input in get_in_list(page, '<input', '>'):
        input = input.replace("'", '"')
        if input.lower().find('type="%s"' % type) == -1:
            continue
        name = get_in(input, 'name="', '"')
        value = get_in(input, 'value="', '"')
        if name is None:
            name = get_in(input, 'name=', ' ')
            if name is None:
                continue
            continue
        if value is None:
            value = get_in(input, 'value=', ' ')
            if value is None:
                value = ''
        data[name] = value
    return data

示例#13

0

显示文件

def get_input(page, type='hidden'):
    data = {}
    for input in get_in_list(page, '<input', '>'):
        input = input.replace("'", '"')
        if input.lower().find('type="%s"' % type) == -1:
            continue
        name = get_in(input, 'name="', '"')
        value = get_in(input, 'value="', '"')
        if name is None:
            name = get_in(input, 'name=', ' ')
            if name is None:
                continue
            continue
        if value is None:
            value = get_in(input, 'value=', ' ')
            if value is None:
                value = ''
        data[name] = value
    return data

示例#14

0

显示文件

    def get_self_canary(self):
        if self.canary:
            return self.canary
        url = "https://profile.live.com/"
        page = self.get_page(url)
        if page is None:
            self.error("get profile page fail")
            return None
        if page.find("确认是否有查看的权限") != -1:
            self.error("login fail")
            raise LoginFailException

        sn_frInvite = get_in(page, "var sn_frInvite", "</script>")
        if not sn_frInvite:
            self.error("get sn_frInvite fail")
            return None
        canary = get_in(sn_frInvite, '"canary":"', '"')
        if not canary:
            self.error("get canary fail")
            return None
        self.canary = str(eval('u"%s"' % canary))
        return canary

示例#15

0

显示文件

文件： msn_post.py 项目： waitingzeng/ttwait-code

    def get_self_canary(self):
        if self.canary:
            return self.canary
        url = 'https://profile.live.com/'
        page = self.get_page(url)
        if page is None:
            self.error('get profile page fail')
            return None
        if page.find('确认是否有查看的权限') != -1:
            self.error('login fail')
            raise LoginFailException

        sn_frInvite = get_in(page, "var sn_frInvite", "</script>")
        if not sn_frInvite:
            self.error('get sn_frInvite fail')
            return None
        canary = get_in(sn_frInvite, '"canary":"', '"')
        if not canary:
            self.error('get canary fail')
            return None
        self.canary = str(eval('u"%s"' % canary))
        return canary

示例#16

0

显示文件

文件： createapp.py 项目： waitingzeng/ttwait-code

 def loginSuccess(self, data):
     RE_PAGE_REDIRECT = 'CheckCookie\?continue=([^"\']+)' 
     try:
         link = re.search(RE_PAGE_REDIRECT, data).group(1)
         redirectURL = urllib2.unquote(link)
         redirectURL = redirectURL.replace('\\x26', '&')
     
     except AttributeError:
         return False
     pageData = self.web.get_page(redirectURL)
     g = text.get_in(pageData, 'var GLOBALS=[,,', ',,')
     if g is None:
         return False
     self.GLOBALS = eval('[%s]' % g)
     return True

示例#17

0

显示文件

文件： msn_post.py 项目： waitingzeng/ttwait-code

 def get_canary(self, cid):
     if self.canary:
         return self.canary
     url = 'https://profile.live.com/cid-%s/' % cid
     page = self.web.get_page(url)
     if page is None:
         self.error('get %s page fail' % url)
         return None
     if page.find('确认是否有查看的权限') != -1:
         self.error('login fail')
         raise LoginFailException
     #file('%s.html' % cid, 'w').write(page)
     if page.find('&#28155;&#21152;&#20026;&#22909;&#21451;') == -1:
         raise HadAddException
     sn_frInvite = get_in(page, "var sn_frInvite", "</script>")
     if not sn_frInvite:
         self.error('get sn_frInvite fail')
         return None
     canary = get_in(sn_frInvite, '"canary":"', '"')
     if not canary:
         self.error('get canary fail')
         return None
     self.canary = str(eval('u"%s"' % canary))
     return self.canary

示例#18

0

显示文件

    def _parse_contact_list(self, page):
        """
        ControlDataIndex:
{cid:"0",showmenu:"1",menudefault:"2",name:"3",contactid:"4",deccid:"5",address:"6",membername:"7",additionalchickletdata:"8",thirdpartyname:"9",attachmenutobody:"10",placedintable:"11",menucustom:"12",tileitemid:"13",nameitemid:"14",psmitemid:"15",badgeitemid:"16",actiontypeurloverride:"17",childicid:"18"}
        """
        if page is None:
            return []
        if not self.cid:
            self.cid = get_in(page, '"vcid":"', '"')
            self.mailhost = get_in(self.web.url, "//", "/mail/")
        cids = []
        try:
            datatxt = get_in(page, "window.ic_control_data = ", ";")
            data = json.loads(datatxt)
            # print data
            for i in range(2, 27):
                key = "ic%x" % i
                if key in data:
                    cids.append([data[key][0], data[key][4], data[key][5], data[key][6], data[key][3]])
                    # if data[key][6]:
                    #    cids.append(data[key][6])
        except Exception, info:
            self.error(info)
            sys.exit()

示例#19

0

显示文件

 def get_canary(self, cid):
     if self.canary:
         return self.canary
     url = "https://profile.live.com/cid-%s/" % cid
     page = self.web.get_page(url)
     if page is None:
         self.error("get %s page fail" % url)
         return None
     if page.find("确认是否有查看的权限") != -1:
         self.error("login fail")
         raise LoginFailException
     # file('%s.html' % cid, 'w').write(page)
     if page.find("&#28155;&#21152;&#20026;&#22909;&#21451;") == -1:
         raise HadAddException
     sn_frInvite = get_in(page, "var sn_frInvite", "</script>")
     if not sn_frInvite:
         self.error("get sn_frInvite fail")
         return None
     canary = get_in(sn_frInvite, '"canary":"', '"')
     if not canary:
         self.error("get canary fail")
         return None
     self.canary = str(eval('u"%s"' % canary))
     return self.canary

示例#20

0

显示文件

    def _login(self, to = None, login_page=None):
            
        login_page = self.web.get_page(self.APP_URL)

        url = self.get_auto_refresh_url(login_page)
        logging.error('url %s', url)
        if url and url.find('cid-') != -1:
            return True
        
        if url:
            login_page = self.web.get_page(url)

        post_url = get_in(login_page, "srf_uPost='", "'")
        logging.error('post_url %s', post_url)
        data = get_hidden(login_page)
        data.update({'login' : self.name,
                'passwd' : self.psw,
                'NewUser' : '1',
                
        })

        #req = urllib2.Request(url, data=data, headers=headers)
        result = self.web.get_page(post_url, data=data)
        if result is None:
            return False
        if result.find('onload="javascript:DoSubmit();"') != -1:
            active_page = self.web.submit(result)
            result = self.active(active_page)

        url = self.get_auto_refresh_url(result)
        logging.error('finish %s', url)
        if url.find('profile.live.com/') != -1:
            #self.process_cookies()
            return True
        if self.web.url.lower().find('jsDisabled.srf') != -1:
            if result.find('action="https://security.live.com') != -1:
                self.error('Disable')
                raise DisabledException
            else:
                self.error('Auth Fail')
                raise AuthFailException
        return False

示例#21

0

显示文件

文件： msn_live.py 项目： waitingzeng/ttwait-code

    def _login(self, to=None, login_page=None):

        login_page = self.web.get_page(self.APP_URL)

        url = self.get_auto_refresh_url(login_page)
        logging.error('url %s', url)
        if url and url.find('cid-') != -1:
            return True

        if url:
            login_page = self.web.get_page(url)

        post_url = get_in(login_page, "srf_uPost='", "'")
        logging.error('post_url %s', post_url)
        data = get_hidden(login_page)
        data.update({
            'login': self.name,
            'passwd': self.psw,
            'NewUser': '******',
        })

        #req = urllib2.Request(url, data=data, headers=headers)
        result = self.web.get_page(post_url, data=data)
        if result is None:
            return False
        if result.find('onload="javascript:DoSubmit();"') != -1:
            active_page = self.web.submit(result)
            result = self.active(active_page)

        url = self.get_auto_refresh_url(result)
        logging.error('finish %s', url)
        if url.find('profile.live.com/') != -1:
            #self.process_cookies()
            return True
        if self.web.url.lower().find('jsDisabled.srf') != -1:
            if result.find('action="https://security.live.com') != -1:
                self.error('Disable')
                raise DisabledException
            else:
                self.error('Auth Fail')
                raise AuthFailException
        return False

示例#22

0

显示文件

    def get_contact_list(self, filter="", all=True):  # filter = Messenger
        baseurl = "https://%s/mail/ContactMainLight.aspx?ContactsSortBy=FileAs&Page=%s&n=%s"
        if filter:
            baseurl = baseurl + "&CategoryFilter=%s" % filter
        p = 1
        url = baseurl % (self.mailhost, p, time.time())
        page = self.web.get_page(url)

        total = get_in(page, '<span class="cnt">&#x200e;(', ")")
        if not total:
            total = "1"
        total = int(total)
        pageNum = (total - 1) / 25 + 1
        cids = self._parse_contact_list(page)
        if all:
            threadname = threading.currentThread().getName()
            for p in range(2, pageNum + 1):
                self.error("begin get contactlist %s page: %s %s", filter, pageNum, p)
                url = baseurl % (self.mailhost, p, time.time())
                page = self.web.get_page(url)
                cids.extend(self._parse_contact_list(page))
        return cids

示例#23

0

显示文件

文件： msn_post.py 项目： waitingzeng/ttwait-code

    def get_contact_list(self, filter='', all=True):  #filter = Messenger
        baseurl = 'https://%s/mail/ContactMainLight.aspx?ContactsSortBy=FileAs&Page=%s&n=%s'
        if filter:
            baseurl = baseurl + '&CategoryFilter=%s' % filter
        p = 1
        url = baseurl % (self.mailhost, p, time.time())
        page = self.web.get_page(url)

        total = get_in(page, '<span class="cnt">&#x200e;(', ')')
        if not total:
            total = '1'
        total = int(total)
        pageNum = (total - 1) / 25 + 1
        cids = self._parse_contact_list(page)
        if all:
            threadname = threading.currentThread().getName()
            for p in range(2, pageNum + 1):
                self.error('begin get contactlist %s page: %s %s', filter,
                           pageNum, p)
                url = baseurl % (self.mailhost, p, time.time())
                page = self.web.get_page(url)
                cids.extend(self._parse_contact_list(page))
        return cids

示例#24

0

显示文件

    def check_ok_new_emails(self, fid):
        self.conn.select(fid)
        res, data = self.conn.list()
        data, uids = self.conn.uid('search', None, '(UID *:*)')

        if not uids or not uids[0]:
            return None, None
        uids = uids[0].split(' ')
        uids = uids[-1:]
        error, data = self.conn.uid("fetch", ",".join(uids),
                                    "(UID BODY.PEEK[])")
        if error != 'OK':
            return None

        for i in reversed(range(0, len(data))):
            item = data[i]
            mo = RE_BODYSTRUCTURE.match(item[0])
            if not mo:
                #logging.error("Returned data not match regex: %s", item)
                continue

            mid = mo.group(1)
            #print item[1]
            #base64_text = get_in(item[1], 'Content-Transfer-Encoding: base64', '------=')
            #msg_text = base64.b64decode(base64_text.strip())
            msg = email.message_from_string(item[1])
            content = msg.get_payload()[0].get_payload(decode=1)
            content = pq(content).text()
            content = content.replace('\n', ' ')
            print repr(content)
            key = get_in(content, 'http', ' ')
            if not key:
                continue
            key = "http" + key.strip()
            return mid, key
        return None, None