def get_title(self): if self.title is not None: return self.title site = format_site_from_url(self.url) try: result = [] def process_website(result): browser = Browser() browser.set_handle_robots(False) browser.open(self.url, timeout=9.00) result.append(browser) thread = threading.Thread(target=process_website, args=(result,)) thread.start() thread.join(timeout=10) if len(result) == 0: raise Exception("browser timedout or failed") browser = result[0] self.title = "[%s] %s" % (site.encode("Utf-8"), encoding_sucks(clean_title(browser.title())).lower().capitalize()) self.langue = get_langue_from_html(browser.response().get_data()) self.save() return self.title except Exception as e: print "Error: fail on %s: %s" % (self.url, e) self.title = "[%s] Error: couldn't fetch the title" % site self.save() return self.title
def get_title(self): if self.title is not None: return self.title b = Browser() site = ".".join(map(lambda x: x.capitalize(), self.url.split("/")[2].replace("www.", "").split(".")[:-1])) try: b.open(self.url) return "[%s] %s" % (site.encode("Utf-8"), encoding_sucks(b.title())) except URLError: self.title = "[%s] Error: couldn't fetch the title" % site self.save() return self.title