Пример #1
0
	def pruneProducers(self, socgr, gumap, pddb, cutoff=1):
		"""
		Removes producers with less than the given number of photos.

		@param socgr: graph of users
		@param groups: list of groups
		@param pddb: an open database of {producer:[photo]}
		@param cutoff: producers with this many photos or less will be pruned
		       (default 1)
		"""
		# TODO NORM maybe also prune groups with >n users

		#FIXME HIGH if we prune users, then we also need to prune groups that
		#point to this user
		delu = []
		#for u in socgr.vs[NID]:
		#	if u in pddb:
		#		if len(pddb[u]) > cutoff:
		#			continue
		#		del pddb[u]
		#	delu.append(u)

		delg = []
		for g in gumap:
			if g in pddb:
				if len(pddb[g]) > cutoff:
					continue
				del pddb[g]
			delg.append(g)

		#socgr.delete_vertices([v.index for v in socgr.vs.select(id_in=set(delu))])
		for g in delg: del gumap[g]

		LOG.info("producer db: pruned %s users, %s groups" % (len(delu), len(delg)))
Пример #2
0
def main():
    from twisted.internet import reactor
    from twisted.python import log
    import logging
    log.PythonLoggingObserver().start()
    logging.getLogger().setLevel(level=logging.DEBUG)
    LOG.setLevel(level=logging.DEBUG)

    api_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
    api_secret = "XXXXXXXXXXXX"

    flickr = TwistedFlickrAPI(api_key, api_secret)

    #flickr.authenticate_console("write"
    #    ).addCallback(log.msg, "<- got token"
    #    ).addBoth(lambda _: reactor.callLater(0, reactor.stop)
    #    )

    #flickr.upload("test.jpg", is_public="0"
    #    ).addBoth(log.msg
    #    ).addBoth(lambda _: reactor.callLater(0, reactor.stop)
    #    )

    flickr.photos_search(user_id='73509078@N00', per_page='10'
        ).addBoth(log.msg
        ).addBoth(lambda _: reactor.callLater(0, reactor.stop)
        )
    
    reactor.run()
Пример #3
0
 def _FlickrAPI__send_multipart(self, url, body, progress_callback=None):
     assert not progress_callback, \
         "twisted upload/replace does not support progress callbacks yet"
     # would be like
     # http://twistedmatrix.com/pipermail/twisted-web/2007-January/003253.html
     LOG.debug("Uploading to %s" % url)
     reply = getPage(url, method="POST", postdata=str(body),
             headers=dict([body.header()]))
     return reply
Пример #4
0
 def need_new(token):
     if token:
         return token, None # good token, no new frob
     LOG.debug("Getting frob for new token")
     rsp = self.auth_getFrob(auth_token=None, format='xmlnode')
     @rsp.addCallback
     def valid_frob(rsp):
         frob = rsp.frob[0].text
         self.validate_frob(frob, perms)
         return token, frob
     return rsp # automatic deferred chaining
Пример #5
0
 def _FlickrAPI__wrap_in_parser(self, wrapped_method,
         parse_format, *args, **kwargs):
     if parse_format in rest_parsers and 'format' in kwargs:
         kwargs['format'] = 'rest'
     LOG.debug('Wrapping call %s(self, %s, %s)' % (wrapped_method, args,
         kwargs))
     data = wrapped_method(*args, **kwargs)
     if parse_format not in rest_parsers:
         return data
     parser = rest_parsers[parse_format]
     return data.addCallback(lambda resp: parser(self, resp))
Пример #6
0
 def _FlickrAPI__flickr_call(self, **kwargs):
     LOG.debug("Calling %s" % kwargs)
     post_data = self.encode_and_sign(kwargs)
     if self.cache and self.cache.get(post_data):
         return defer.succeed(self.cache.get(post_data))
     url = "http://" + FlickrAPI.flickr_host + FlickrAPI.flickr_rest_form
     reply = getPage(url, method="POST", postdata=post_data, headers={
         "Content-Type": "application/x-www-form-urlencoded"})
     if self.cache is not None:
         reply.addCallback(self._add_to_cache, post_data)
     return reply
Пример #7
0
	def getSetPhotos(self, sets, x):
		"""
		Gets sets of a given user and all photos belonging to it

		@param sets: an iterable of set ids
		@param x: an executor to execute calls in parallel
		@return: {set:[photo]}
		"""
		spmap = {}

		#[s.get(NID) for s in self.photosets_getList(user_id=nsid).getchildren()[0].getchildren()]
		for r in x.run_to_results_any(partial(self.photosets_getPhotos, photoset_id=sid) for sid in sets):
			pset = r.getchildren()[0]
			sid = pset.get(NID)
			spmap[sid] = [p.get(NID) for p in pset.getchildren()]
			LOG.debug("set: got %s photos (%s)" % (len(pset), sid), 6)

		return spmap
Пример #8
0
	def commitUserPhotos(self, users, pddb):
		"""
		Gets the photos of the given users and saves these to a database

		@param users: a list of user ids
		@param pddb: an open database of {producer:[photo]}
		"""
		if type(users) != set and len(users) > 16: users = set(users) # efficient membership test
		def run(nsid):
			# OPT HIGH decide whether we want this many, or whether "faves" only will do
			stream = list(self.data_walker(self.people_getPublicPhotos, user_id=nsid, per_page=500))
			faves = list(p for p in self.data_walker(self.favorites_getPublicList, user_id=nsid, per_page=500) if p.get("owner") in users)
			return stream, faves

		def post(nsid, i, (stream, faves)):
			photos = [p.get(NID) for p in chain(stream, faves)]
			if len(photos) >= 4096:
				LOG.info("producer db (user): got %s photos for user %s" % (len(photos), nsid))
			pddb[nsid] = photos
Пример #9
0
 def check(token):
     if not token:
         return None # need new one
     LOG.debug("Trying cached token '%s'" % token)
     rsp = self.auth_checkToken(auth_token=token, format='xmlnode')
     @rsp.addCallback
     def check_get(rsp):
         tokenPerms = rsp.auth[0].perms[0].text
         if tokenPerms == "read" and perms != "read":
             return None # need new
         elif tokenPerms == "write" and perms == "delete":
             return None # need new
         return token # is good
     @rsp.addErrback
     def check_err(err):
         err.trap(FlickrError)
         LOG.debug("Cached token invalid")
         self.token_cache.forget()
         return None # need new
     return rsp # automatic deferred chaining
Пример #10
0
	def __flickr_call(self, **kwargs):
		# Use persistent HTTP connections through a thread-local socket
		from flickrapi import LOG

		LOG.debug("Calling %s" % kwargs)

		post_data = self.encode_and_sign(kwargs)

		# Return value from cache if available
		if self.cache and self.cache.get(post_data):
			return self.cache.get(post_data)

		# Thread-local persistent connection
		try:
			if "conn" not in self.thr.__dict__:
				self.thr.conn = HTTPConnection(FlickrAPI.flickr_host)
				LOG.debug("connection opened: %s" % FlickrAPI.flickr_host)

			self.thr.conn.request("POST", FlickrAPI.flickr_rest_form, post_data,
				{"Content-Type": "application/x-www-form-urlencoded"})
			reply = self.thr.conn.getresponse().read()

		except (ImproperConnectionState, socket.error), e:
			LOG.debug("connection error: %s" % repr(e))
			self.thr.conn.close()
			del self.thr.conn
			raise
Пример #11
0
	def scrapeIDs(self, seed, size):

		if type(size) != int:
			raise TypeError

		def next(ss, qq):
			id = qq.popleft()
			if id in ss: return None
			node = self.makeID(id)
			qq.extend(node.out.keys())
			ss.add_node(node)
			return id

		s = NodeSample()
		q = deque([self.getNSID(seed)])

		while len(s) < size:
			id = next(s, q)
			if id is not None:
				LOG.info("id sample: %s/%s (added %s)" % (len(s), size, id))

		s.build()
		return s
Пример #12
0
 def check_err(err):
     err.trap(FlickrError)
     LOG.debug("Cached token invalid")
     self.token_cache.forget()
     return None # need new
Пример #13
0
 def extract_token(rsp):
     token = rsp.auth[0].token[0].text
     LOG.debug("get_token: new token '%s'" % token)
     # store the auth info for next time
     self.token_cache.token = token
     return token
Пример #14
0
		def post(gid, i, photos):
			if len(photos) >= 4096:
				LOG.info("producer db (group): got %s photos for group %s" % (len(photos), gid))
			pddb[gid] = [p.get(NID) for p in photos]
Пример #15
0
		"""
		if vkdb.writeback is not True:
			raise ValueError("[vkdb] must have writeback=True")

		def syncer(i, (key, items)):
			vkdb.sync()

		for i, (key, items) in enumerate_cb(kvdb.iteritems(), syncer, every=0x10000):
			for item in items:
				if item in vkdb:
					vkdb[item].append(key)
				else:
					vkdb[item] = [key]
		vkdb.sync()

		LOG.info("%s db: inverted %s keys to %s items" % (name, len(kvdb), len(vkdb)))


	def commitTagClusters(self, tags, tcdb):
		"""
		Gets the clusters of all the given tags and saves these to a database

		@param tags: a list of tags
		@param tcdb: an open database of {tag:[cluster]}
		"""
		def run(tag):
			try:
				# FIXME HIGH verify that this does the right thing for unicode tags
				# atm all evidence points to flickr not doing clustering anaylses for them...
				clusters = self.tags_getClusters(tag=tag).getchildren()[0].getchildren()
			except FlickrError, e: