class katsomo(): URL = { 'login' : 'http://m.katsomo.fi/katsomo/login', 'programdir' : 'http://m.katsomo.fi/katsomo/programs', 'programs' : 'http://m.katsomo.fi/katsomo/?treeId=', 'videolink' : 'http://m.katsomo.fi/?progId=' } clearCache=False def __init__(self, username="", password="", cookie_file=""): self.cj = LWPCookieJar(cookie_file) if username == "": self.noLogin = True else: self.noLogin = False self.username = username self.password = password try: self.cj.revert(ignore_discard = True) except IOError: pass self.cj.set_cookie(self.makeCookie('hq','1')) self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) self.user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3' def makeCookie(self, name, value): return Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain="katsomo.fi", domain_specified=True, domain_initial_dot=False, path="/", path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None ) def getPage(self, url,postvars={}, header_data = {}): req = urllib2.Request(url, urllib.urlencode(postvars), header_data) req.add_header('user-agent', self.user_agent) try: resp = self.opener.open(req).read() except HTTPError, error: raise NetworkError('HTTPError: %s' % error) except URLError, error: raise NetworkError('URLError: %s' % error)
class zhihuCrawler(object): def __init__(self, login_url, login_info, header): self.base_url = 'http://www.zhihu.com' self.login_url = login_url self.login_info = login_info self.header = header self.cookies = LWPCookieJar('cookies.jar') try: self.cookies.revert() # print open(cookies.filename).read() except Exception,e: print 'First time to login, setting up cookies...' opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookies)) urllib2.install_opener(opener)