Пример #1
0
class Bilibili():
    def __init__(self, appkey=APPKEY, appsecret=APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = LWPCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(
                    self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(
                    requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path=None):
        utils.get_page_content('https://passport.bilibili.com/login')
        result = utils.get_page_content(
            LOGIN_CAPTCHA_URL,
            headers={
                'Referer':
                'https://passport.bilibili.com/ajax/miniLogin/minilogin'
            })
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(
            utils.get_page_content(
                LOGIN_HASH_URL,
                headers={
                    'Referer':
                    'https://passport.bilibili.com/ajax/miniLogin/minilogin'
                }))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs': []}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/',
            'subs': []
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/',
            'subs': []
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs': []}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{
            tid: {
                'title': '全部',
                'url': CATEGORY[tid]['url'],
                'subs': []
            }
        }]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=10):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'userid={0}&pwd={1}&keep=1&captcha={2}'.format(
            userid, pwd, captcha)
        result = utils.get_page_content(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer':
                'https://passport.bilibili.com/ajax/miniLogin/minilogin'
            })
        if not requests.utils.dict_from_cookiejar(
                self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['message']
                                          ['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(
            requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [
            durl.getElementsByTagName('url')[0].firstChild.nodeValue
            for durl in doc.getElementsByTagName('durl')
        ]
        urls = [
            url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(
                r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
            for url in urls
        ]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)
Пример #2
0
class GVAccount(object):
    """Handles account-related functions of Google Voice for smsGV."""
    def __init__(self, username, password):
        self.id             = None
        self.username       = str(username)
        from cookielib import LWPCookieJar
        self.cookies        = LWPCookieJar("%s.lwp" % self.username) # Named using the username to prevent overlap with another user.
        # Switch to UUID instead, then default to user if an older version of the python
        # TODO: Evaluate possibility of cookie_jar.save() and .load() to add failsafe in case of need to 'relogin'
        self.logged_in      = False
        self.last_time      = 0
        self.temp_time      = 0
        self.initialized    = False
        self.conversations  = {}
        self.login(password)
    
    def __str__(self):
        return '%s' % self.username
    
    def __find_id               (self):
        """Finds account ID used by Google to authenticate SMS sending."""
        # Google has a special little id that they require is included in sending messages
        from urllib2 import Request, urlopen
        handle = urlopen(Request(SMSLIST_M_URL))
        from lxml import html
        parsed_html = html.fromstring(handle.read())
        # Grab UID from the page and set it.
        self.id = parsed_html.forms[0].inputs["_rnr_se"].value
    
    def login                   (self, password):
        """Logs into the Google Account system and receives the cookies into a
        file to allow continued use of the Google Voice system."""
        if not self.logged_in:# We don't need to repeat this process over and over
            from urllib2 import HTTPCookieProcessor, build_opener, install_opener, Request, urlopen
            opener = build_opener(HTTPCookieProcessor(self.cookies))
            install_opener(opener) # Let Google know we'll accept its nomtastic cookies
            from urllib import urlencode
            form = urlencode({ # Will be pushed to Google via POST
                'continue': SMSLIST_M_URL,
                'Email': self.username,
                'Passwd': password,
                'PersistentCookies': 'yes',
            })
            urlopen(Request(LOGIN_URL, form, {'Content-type': 'application/x-www-form-urlencoded'}))
            self.logged_in = True
            self.__find_id()
        self.check_sms()    # Initialization check
        if not self.initialized:
            self.initialized = True
    
    def logout                  (self):
        """Unsets all cookies and resets variables with semi-sensitive data."""
        self.cookies.clear()
        self.id             = None
        self.initialized    = False
        self.logged_in      = False
        self.conversations  = {}
    
    def send_sms                (self, number, message):
        """Sends a text message (SMS) to any supplied number. Seems to bypass 2
        message limit imposed by Google Voice's website. Please do not abuse this!"""
        if self.logged_in:
            _simple_post(self.id, SEND_SMS_URL, {
                'number':   number,
                'smstext':  message,
            })
        else:
            raise NotLoggedIn(self.username)
    
    def __find_conversations    (self, sms_list):
        """Parses the HTML received from Google's servers to receive only the relevant messages."""
        import cjson
        json = cjson.decode(sms_list.find('.//json').text)
        # We only want to get relevant conversations
        for conversation_data in json['messages'].itervalues():
            # Check if unread and within the past twenty-four hours
            from time import time
            # Google's precision goes to the thousandths
            conversation_data['startTime'] = float(conversation_data['startTime'])/1000
            # Checks for multiple things:
            #   - Message is a SMS in the inbox
            #   - Message is unread
            #   - Message is within the past 24 hours (86,400 seconds)
            if (time() - conversation_data['startTime'] < 86400) \
               and (('sms' in conversation_data['labels'] \
                    and 'inbox' in conversation_data['labels']) \
                    or ('spam' in conversation_data['labels'])): # TODO: Support spam/archive
                # If not initialized, then the -very- last message sent is
                # found. This is used when later detecting new messages.
                if int(conversation_data['startTime']) > self.last_time:
                    self.temp_time = conversation_data['startTime']
                    if conversation_data['id'] in self.conversations:
                        self.conversations[conversation_data['id']].clear_local_messages()
                        if self.conversations[conversation_data['id']].note != conversation_data['note']:
                            self.conversations[conversation_data['id']].note = conversation_data['note']
                        if self.conversations[conversation_data['id']].read != conversation_data['isRead']:
                            self.conversations[conversation_data['id']].read = conversation_data['isRead']
                        if self.conversations[conversation_data['id']].spam != conversation_data['isSpam']:
                            self.conversations[conversation_data['id']].spam = conversation_data['isSpam']
                        if self.conversations[conversation_data['id']].deleted != conversation_data['isTrash']:
                            self.conversations[conversation_data['id']].deleted = conversation_data['isTrash']
                        if self.conversations[conversation_data['id']].starred != conversation_data['star']:
                            self.conversations[conversation_data['id']].starred = conversation_data['star']
                    else:
                        self.conversations[conversation_data['id']] = GVConversation(self,
                            conversation_data['id'],
                            conversation_data['phoneNumber'],
                            conversation_data['displayNumber'],
                            conversation_data['note'],
                            conversation_data['isRead'],
                            conversation_data['isSpam'],
                            conversation_data['isTrash'],
                            conversation_data['star'])
        if self.temp_time == 0:
            self.temp_time = self.last_time
    
    def __check_conversations   (self, sms_list, page='inbox'):
        for cid in self.conversations.iterkeys():
            # Traverses down the DOM to get to the proper div that contains all of the SMS data
            # The -1 brings us to the end to retrieve the very last message.
            try:
                conversation_data = sms_list.find_class('gc-message')[0].getparent().get_element_by_id(cid).find_class('gc-message-message-display')[-1]
            except KeyError:
                if (self.conversations[cid].spam and page == 'spam') \
                  or (not self.conversations[cid].spam and page != 'spam'):
                    del self.conversations[cid]
            except IndexError:
                pass # Happens because of an empty page
            else:
                self.conversations[cid].find_messages(conversation_data)
    
    def __get_page(self, url):
        from urllib2 import Request, urlopen
        sms_list = urlopen(Request(url))
        from lxml import html
        # Strip CDATA tags
        sms_list = sms_list.read()
        sms_list = sms_list.replace('<![CDATA[', '').replace(']]>', '')
        sms_list = html.document_fromstring(sms_list)
        return sms_list
    
    def check_sms(self):
        """Retrieves the SMS messages from Google's servers to pass to the Parse function."""
        if self.logged_in:
            sms_list_inbox  = self.__get_page(SMSLIST_URL[0])
            sms_list_spam   = self.__get_page(SMSLIST_URL[1])
            self.__find_conversations (sms_list_inbox)
            self.__check_conversations(sms_list_inbox)
            self.__find_conversations (sms_list_spam)
            self.__check_conversations(sms_list_spam, 'spam')
        else:
            raise NotLoggedIn(self.username)