def get_title(self): if self.title is not None: return self.title site = format_site_from_url(self.url) try: result = [] def process_website(result): browser = Browser() browser.set_handle_robots(False) browser.open(self.url, timeout=9.00) result.append(browser) thread = threading.Thread(target=process_website, args=(result,)) thread.start() thread.join(timeout=10) if len(result) == 0: raise Exception("browser timedout or failed") browser = result[0] self.title = "[%s] %s" % (site.encode("Utf-8"), encoding_sucks(clean_title(browser.title())).lower().capitalize()) self.langue = get_langue_from_html(browser.response().get_data()) self.save() return self.title except Exception as e: print "Error: fail on %s: %s" % (self.url, e) self.title = "[%s] Error: couldn't fetch the title" % site self.save() return self.title
def get_langue(self): if self.langue is not None: return self.langue try: lang = get_langue_from_html(urlopen(self.url).read()) self.langue = lang self.save() return lang except URLError: self.langue = "" self.save() return self.langue