def __init__(self, username=None, password=None, firefox_cookies=None, parser=None, history=NoHistory(), proxy=None, logger=None): """ Constructor of Browser. @param username [str] username on website. @param password [str] password on website. If it is None, Browser will not try to login. @param filefox_cookies [str] Path to cookies' sqlite file. @param parser [IParser] parser to use on HTML files. @param hisory [object] History manager. Default value is an object which does not keep history. @param proxy [str] proxy URL to use. """ mechanize.Browser.__init__(self, history=history) self.logger = getLogger('browser', logger) self.addheaders = [ ['User-agent', self.USER_AGENT] ] # Use a proxy self.proxy = proxy if proxy: proto = 'http' if proxy.find('://') >= 0: proto, domain = proxy.split('://', 1) else: domain = proxy self.set_proxies({proto: domain}) # Share cookies with firefox if firefox_cookies and HAVE_COOKIES: self._cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies) self._cookie.load() self.set_cookiejar(self._cookie) else: self._cookie = None if parser is None: parser = get_parser()() elif isinstance(parser, (tuple,list)): parser = get_parser(parser)() self.parser = parser self.page = None self.last_update = 0.0 self.username = username self.password = password self.lock = RLock() if self.password: try: self.home() # Do not abort the build of browser when the website is down. except BrowserUnavailable: pass
def __init__( self, firefox_cookies=None, parser=None, history=NoHistory(), proxy=None, logger=None, factory=None, responses_dirname=None, ): mechanize.Browser.__init__(self, history=history, factory=factory) self.logger = getLogger("browser", logger) self.addheaders = [["User-agent", self.USER_AGENT]] # Use a proxy self.proxy = proxy if proxy: proto = "http" if "://" in proxy: v = urlsplit(proxy) proto = v.scheme domain = v.netloc else: domain = proxy self.set_proxies({proto: domain}) # Share cookies with firefox if firefox_cookies and HAVE_COOKIES: self._cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies) self._cookie.load() self.set_cookiejar(self._cookie) else: self._cookie = None if parser is None: parser = get_parser()() elif isinstance(parser, (tuple, list, str, unicode)): parser = get_parser(parser)() self.parser = parser self.lock = RLock() if self.DEBUG_HTTP: # display messages from httplib self.set_debug_http(True) if self.DEBUG_MECHANIZE: # Enable log messages from mechanize.Browser self.set_debug_redirects(True) self.responses_dirname = responses_dirname
def __init__(self, firefox_cookies=None, parser=None, history=NoHistory(), proxy=None, logger=None, factory=None, responses_dirname=None): mechanize.Browser.__init__(self, history=history, factory=factory) self.logger = getLogger('browser', logger) self.addheaders = [['User-agent', self.USER_AGENT]] # Use a proxy self.proxy = proxy if proxy: proto = 'http' if '://' in proxy: v = urlsplit(proxy) proto = v.scheme domain = v.netloc else: domain = proxy self.set_proxies({proto: domain}) # Share cookies with firefox if firefox_cookies and HAVE_COOKIES: self._cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies) self._cookie.load() self.set_cookiejar(self._cookie) else: self._cookie = None if parser is None: parser = get_parser()() elif isinstance(parser, (tuple, list, basestring)): parser = get_parser(parser)() self.parser = parser self.lock = RLock() if self.DEBUG_HTTP: # display messages from httplib self.set_debug_http(True) if self.DEBUG_MECHANIZE: # Enable log messages from mechanize.Browser self.set_debug_redirects(True) self.responses_dirname = responses_dirname
def get_current(self): doc = self.browser.location('http://www.novaplanet.com/radionova/ontheair?origin=/') html = doc['track']['markup'] parser = get_parser()() doc = parser.parse(StringIO(html)) artist = u' '.join([txt.strip() for txt in doc.xpath('//div[@class="artist"]')[0].itertext()]) title = u' '.join([txt.strip() for txt in doc.xpath('//div[@class="title"]')[0].itertext()]) return unicode(artist).strip(), unicode(title).strip()
def __init__(self, firefox_cookies=None, parser=None, history=NoHistory(), proxy=None, logger=None, factory=None, responses_dirname=None): mechanize.Browser.__init__(self, history=history, factory=factory) self.logger = getLogger('browser', logger) self.addheaders = [ ['User-agent', self.USER_AGENT] ] # Use a proxy self.proxy = proxy if proxy is not None: self.set_proxies(proxy) # Share cookies with firefox if firefox_cookies and HAVE_COOKIES: self._cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies) self._cookie.load() self.set_cookiejar(self._cookie) else: self._cookie = None if parser is None: parser = get_parser()() elif isinstance(parser, (tuple,list,basestring)): parser = get_parser(parser)() self.parser = parser self.lock = RLock() if self.DEBUG_HTTP: # display messages from httplib self.set_debug_http(True) if logging.root.level == logging.DEBUG: # Enable log messages from mechanize.Browser self.set_debug_redirects(True) mech_logger = logging.getLogger("mechanize") mech_logger.setLevel(logging.INFO) self.responses_dirname = responses_dirname self.responses_count = 0
def get_document(self, result, parser=None, encoding=None): """ Get a parsed document from a stream. :param result: HTML page stream :type result: stream """ if parser is None: parser = self.parser elif isinstance(parser, (basestring, list, tuple)): parser = get_parser(parser)() if encoding is None: encoding = self.ENCODING return parser.parse(result, encoding)