def get_token(): log.info("%s" % dict(request.query)) code = request.query.get('code','') if memcache.get("oauth:%s" % request.query.get('state','')) != "session_id": abort(500, "Invalid state") token_url = "https://api.dropbox.com/1/oauth2/token" log.info(request.url) result = Struct(fetch(token_url, urllib.urlencode({ "code" : code, "grant_type" : "authorization_code", "client_id" : settings.dropbox.app_key, "client_secret": settings.dropbox.app_secret, "redirect_uri" : "http://localhost:8080/authorize/confirmation" }))) if result.status == 200: try: r = Struct(json.loads(result.data)) r.id = "default" log.info(r) t = DropboxToken(**r) t.put() except: log.error("%s", traceback.format_exc()) return result
def get_feed(self, feed_name): if not feed_name in settings.feeds: raise Exception('Invalid feed name') feed_uri = settings.feeds[feed_name] return fetch(feed_uri)
def html_fetcher(site): """Fetch the favicon the hard way""" endpoint = "http://%s" % urlparse.urlparse(site).hostname try: res = fetch(endpoint) except Exception, e: log.error("Could not fetch %s: %s" % (endpoint, e)) return None
def google_fetcher(site): """Fetch the favicon via Google services""" endpoint = "http://www.google.com/s2/favicons?domain=%s" % urlparse.urlparse(site).hostname try: res = fetch(endpoint) except Exception, e: log.error("could not fetch %s: %s" % (endpoint, e)) return None
def dumb_fetcher(site): """Fetch the favicon the dumb way""" endpoint = "http://%s/favicon.ico" % urlparse.urlparse(site).hostname try: res = fetch(endpoint) except Exception, e: log.error("could not fetch %s: %s" % (endpoint, tb_info())) return None
def get_page(self, page): """Return a single page from the cloud store, storing it locally""" if not self.token: log.debug("No token") return None # get the folder contents metadata = self.get_metadata(page) if not metadata: return None markup = None for i in metadata['contents']: if not i['is_dir']: if os.path.basename(i['path']) in self.valid_indexes: markup = i['path'] break if not markup: return None get_url = _urls.files % (markup, urllib.urlencode({"access_token": self.token})) log.debug(get_url) r = fetch(get_url) if r['status'] == 200: metadata = json.loads(r['x-dropbox-metadata']) try: headers, body, mime_type = parse_rfc822(r['data']) except Exception as e: log.error("Could not parse %s: %s" % (page, e)) return None # mtime is taken from cloud store metadata mtime = parse_dropbox_date(metadata['modified']) ctime = headers.get('created', None) if ctime: ctime = datetime.fromtimestamp(parse_date(ctime)) id = page.lower() params = { "id" : id, "path" : page, "ctime" : ctime, "mtime" : mtime, "title" : headers.get('title', 'Untitled'), "tags" : headers.get('tags', None), "body" : body, "mime_type": mime_type, "headers" : headers } p = Page(**params) p.put() memcache.set(params['id'], params['headers'], namespace=NS_PAGE_METADATA) return p return None
def scan_subtree(self, path): if not self.token: log.debug("No token") return None params = {"access_token": self.token, "query" : "index"} search_url = _urls.search % (os.path.normpath(os.path.join(settings.dropbox.root_path, path)), urllib.urlencode(params)) log.warn(search_url) r = fetch(search_url) if r['status'] == 200: results = filter(lambda x: not x["is_dir"],json.loads(r["data"])) return [transform_entry(i) for i in results] return None
def get_attachment(self, page, filename): """Return an attachment from the cloud store, storing it locally""" if not self.token: log.debug("No token") return None # get the folder contents metadata = self.get_metadata(page) if not metadata: return None target = None for i in metadata['contents']: if not i['is_dir']: if os.path.basename(i['path']) == filename: target = i['path'] break if not target: return None get_url = _urls.files % (target, urllib.urlencode({"access_token": self.token})) log.debug(get_url) r = fetch(get_url) if r['status'] == 200: metadata = json.loads(r['x-dropbox-metadata']) # mtime is taken from cloud store metadata mtime = parse_dropbox_date(metadata['modified']) id = os.path.join(page.lower(),filename.lower()) params = { "id" : id, "path" : page, "filename" : filename, "mtime" : mtime, "data" : r['data'], "mime_type": metadata['mime_type'], } a = Attachment(**params) a.put() return a return None
def get_metadata(self, path): if not self.token: log.debug("No token") return None metadata = memcache.get(path, namespace=NS_CLOUD_METADATA) params = {"access_token": self.token} if metadata: params["hash"] = metadata["hash"] metadata_url = _urls.metadata % (os.path.normpath(os.path.join(settings.dropbox.root_path, path)), urllib.urlencode(params)) log.debug(metadata_url) r = fetch(metadata_url) if r['status'] == 200: metadata = json.loads(r["data"]) memcache.set(path, metadata, namespace=NS_CLOUD_METADATA) return metadata # also valid for 304/404 return values
def fetch_feed(self, feed): if not feed.enabled: return (schema, netloc, path, params, query, fragment) = urlparse.urlparse(feed.url) now = time.time() if feed.last_checked: if feed.ttl: if (now - feed.last_checked) < (feed.ttl * 60): log.info("TTL %s" % netloc) return if (now - feed.last_checked) < settings.fetcher.min_interval: log.info("INTERVAL %s" % netloc) return if feed.last_modified: if (now - feed.last_modified) < settings.fetcher.min_interval: log.info("LAST_MODIFIED %s" % netloc) return modified = http_time(feed.last_modified) else: modified = None try: response = fetch(feed.url, etag=feed.etag, last_modified=modified, timeout=settings.fetcher.fetch_timeout) log.debug("%s - %d, %d" % (feed.url, response['status'], len(response['data']))) except Exception, e: log.error("Could not fetch %s: %s" % (feed.url, e)) # TODO: store reason properly # Network connect timeout error (Unknown) self.after_fetch(feed, status=599, error=True) return
def fetch_feed(self, feed): if not feed.enabled: return (schema, netloc, path, params, query, fragment) = urlparse.urlparse(feed.url) now = time.time() if feed.last_checked: if feed.ttl: if (now - feed.last_checked) < (feed.ttl * 60): log.info("TTL %s" % netloc) return if (now - feed.last_checked) < settings.fetcher.min_interval: log.info("INTERVAL %s" % netloc) return if feed.last_modified: if (now - feed.last_modified) < settings.fetcher.min_interval: log.info("LAST_MODIFIED %s" % netloc) return modified = http_time(feed.last_modified) else: modified = None try: response = fetch(feed.url, etag = feed.etag, last_modified = modified, timeout=settings.fetcher.fetch_timeout) log.debug("%s - %d, %d" % (feed.url, response['status'], len(response['data']))) except Exception, e: log.error("Could not fetch %s: %s" % (feed.url, e)) # TODO: store reason properly # Network connect timeout error (Unknown) self.after_fetch(feed, status = 599, error = True) return
except Exception, e: log.error("Could not fetch %s: %s" % (endpoint, e)) return None try: soup = BeautifulSoup(res['data']) except Exception, e: log.error("Could not parse %s: %s" % (endpoint, e)) return None link = soup.find("link", rel="shortcut icon") if not link: return None url = link['href'] try: res = fetch(url) except Exception, e: log.error("could not fetch %s: %s" % (endpoint, e)) return None return data_uri(res['content-type'], res['data']) def fetch_anyway(site): global _default data = None for handler in [google_fetcher,dumb_fetcher,html_fetcher]: data = handler(site) if data: return data return _default