def pruneProducers(self, socgr, gumap, pddb, cutoff=1): """ Removes producers with less than the given number of photos. @param socgr: graph of users @param groups: list of groups @param pddb: an open database of {producer:[photo]} @param cutoff: producers with this many photos or less will be pruned (default 1) """ # TODO NORM maybe also prune groups with >n users #FIXME HIGH if we prune users, then we also need to prune groups that #point to this user delu = [] #for u in socgr.vs[NID]: # if u in pddb: # if len(pddb[u]) > cutoff: # continue # del pddb[u] # delu.append(u) delg = [] for g in gumap: if g in pddb: if len(pddb[g]) > cutoff: continue del pddb[g] delg.append(g) #socgr.delete_vertices([v.index for v in socgr.vs.select(id_in=set(delu))]) for g in delg: del gumap[g] LOG.info("producer db: pruned %s users, %s groups" % (len(delu), len(delg)))
def main(): from twisted.internet import reactor from twisted.python import log import logging log.PythonLoggingObserver().start() logging.getLogger().setLevel(level=logging.DEBUG) LOG.setLevel(level=logging.DEBUG) api_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" api_secret = "XXXXXXXXXXXX" flickr = TwistedFlickrAPI(api_key, api_secret) #flickr.authenticate_console("write" # ).addCallback(log.msg, "<- got token" # ).addBoth(lambda _: reactor.callLater(0, reactor.stop) # ) #flickr.upload("test.jpg", is_public="0" # ).addBoth(log.msg # ).addBoth(lambda _: reactor.callLater(0, reactor.stop) # ) flickr.photos_search(user_id='73509078@N00', per_page='10' ).addBoth(log.msg ).addBoth(lambda _: reactor.callLater(0, reactor.stop) ) reactor.run()
def _FlickrAPI__send_multipart(self, url, body, progress_callback=None): assert not progress_callback, \ "twisted upload/replace does not support progress callbacks yet" # would be like # http://twistedmatrix.com/pipermail/twisted-web/2007-January/003253.html LOG.debug("Uploading to %s" % url) reply = getPage(url, method="POST", postdata=str(body), headers=dict([body.header()])) return reply
def need_new(token): if token: return token, None # good token, no new frob LOG.debug("Getting frob for new token") rsp = self.auth_getFrob(auth_token=None, format='xmlnode') @rsp.addCallback def valid_frob(rsp): frob = rsp.frob[0].text self.validate_frob(frob, perms) return token, frob return rsp # automatic deferred chaining
def _FlickrAPI__wrap_in_parser(self, wrapped_method, parse_format, *args, **kwargs): if parse_format in rest_parsers and 'format' in kwargs: kwargs['format'] = 'rest' LOG.debug('Wrapping call %s(self, %s, %s)' % (wrapped_method, args, kwargs)) data = wrapped_method(*args, **kwargs) if parse_format not in rest_parsers: return data parser = rest_parsers[parse_format] return data.addCallback(lambda resp: parser(self, resp))
def _FlickrAPI__flickr_call(self, **kwargs): LOG.debug("Calling %s" % kwargs) post_data = self.encode_and_sign(kwargs) if self.cache and self.cache.get(post_data): return defer.succeed(self.cache.get(post_data)) url = "http://" + FlickrAPI.flickr_host + FlickrAPI.flickr_rest_form reply = getPage(url, method="POST", postdata=post_data, headers={ "Content-Type": "application/x-www-form-urlencoded"}) if self.cache is not None: reply.addCallback(self._add_to_cache, post_data) return reply
def getSetPhotos(self, sets, x): """ Gets sets of a given user and all photos belonging to it @param sets: an iterable of set ids @param x: an executor to execute calls in parallel @return: {set:[photo]} """ spmap = {} #[s.get(NID) for s in self.photosets_getList(user_id=nsid).getchildren()[0].getchildren()] for r in x.run_to_results_any(partial(self.photosets_getPhotos, photoset_id=sid) for sid in sets): pset = r.getchildren()[0] sid = pset.get(NID) spmap[sid] = [p.get(NID) for p in pset.getchildren()] LOG.debug("set: got %s photos (%s)" % (len(pset), sid), 6) return spmap
def commitUserPhotos(self, users, pddb): """ Gets the photos of the given users and saves these to a database @param users: a list of user ids @param pddb: an open database of {producer:[photo]} """ if type(users) != set and len(users) > 16: users = set(users) # efficient membership test def run(nsid): # OPT HIGH decide whether we want this many, or whether "faves" only will do stream = list(self.data_walker(self.people_getPublicPhotos, user_id=nsid, per_page=500)) faves = list(p for p in self.data_walker(self.favorites_getPublicList, user_id=nsid, per_page=500) if p.get("owner") in users) return stream, faves def post(nsid, i, (stream, faves)): photos = [p.get(NID) for p in chain(stream, faves)] if len(photos) >= 4096: LOG.info("producer db (user): got %s photos for user %s" % (len(photos), nsid)) pddb[nsid] = photos
def check(token): if not token: return None # need new one LOG.debug("Trying cached token '%s'" % token) rsp = self.auth_checkToken(auth_token=token, format='xmlnode') @rsp.addCallback def check_get(rsp): tokenPerms = rsp.auth[0].perms[0].text if tokenPerms == "read" and perms != "read": return None # need new elif tokenPerms == "write" and perms == "delete": return None # need new return token # is good @rsp.addErrback def check_err(err): err.trap(FlickrError) LOG.debug("Cached token invalid") self.token_cache.forget() return None # need new return rsp # automatic deferred chaining
def __flickr_call(self, **kwargs): # Use persistent HTTP connections through a thread-local socket from flickrapi import LOG LOG.debug("Calling %s" % kwargs) post_data = self.encode_and_sign(kwargs) # Return value from cache if available if self.cache and self.cache.get(post_data): return self.cache.get(post_data) # Thread-local persistent connection try: if "conn" not in self.thr.__dict__: self.thr.conn = HTTPConnection(FlickrAPI.flickr_host) LOG.debug("connection opened: %s" % FlickrAPI.flickr_host) self.thr.conn.request("POST", FlickrAPI.flickr_rest_form, post_data, {"Content-Type": "application/x-www-form-urlencoded"}) reply = self.thr.conn.getresponse().read() except (ImproperConnectionState, socket.error), e: LOG.debug("connection error: %s" % repr(e)) self.thr.conn.close() del self.thr.conn raise
def scrapeIDs(self, seed, size): if type(size) != int: raise TypeError def next(ss, qq): id = qq.popleft() if id in ss: return None node = self.makeID(id) qq.extend(node.out.keys()) ss.add_node(node) return id s = NodeSample() q = deque([self.getNSID(seed)]) while len(s) < size: id = next(s, q) if id is not None: LOG.info("id sample: %s/%s (added %s)" % (len(s), size, id)) s.build() return s
def check_err(err): err.trap(FlickrError) LOG.debug("Cached token invalid") self.token_cache.forget() return None # need new
def extract_token(rsp): token = rsp.auth[0].token[0].text LOG.debug("get_token: new token '%s'" % token) # store the auth info for next time self.token_cache.token = token return token
def post(gid, i, photos): if len(photos) >= 4096: LOG.info("producer db (group): got %s photos for group %s" % (len(photos), gid)) pddb[gid] = [p.get(NID) for p in photos]
""" if vkdb.writeback is not True: raise ValueError("[vkdb] must have writeback=True") def syncer(i, (key, items)): vkdb.sync() for i, (key, items) in enumerate_cb(kvdb.iteritems(), syncer, every=0x10000): for item in items: if item in vkdb: vkdb[item].append(key) else: vkdb[item] = [key] vkdb.sync() LOG.info("%s db: inverted %s keys to %s items" % (name, len(kvdb), len(vkdb))) def commitTagClusters(self, tags, tcdb): """ Gets the clusters of all the given tags and saves these to a database @param tags: a list of tags @param tcdb: an open database of {tag:[cluster]} """ def run(tag): try: # FIXME HIGH verify that this does the right thing for unicode tags # atm all evidence points to flickr not doing clustering anaylses for them... clusters = self.tags_getClusters(tag=tag).getchildren()[0].getchildren() except FlickrError, e: