def fancy_stats_deferred(exid, start_dt, end_dt, cursor, uid, i): key_name = ExerciseStatisticShard.make_key(exid, start_dt, end_dt, cursor) if cursor and ExerciseStatisticShard.get_by_key_name(key_name): # We've already run, die. return query = ProblemLog.all() query.filter('exercise =', exid) query.filter('correct =', True) query.filter('time_done >=', start_dt) query.filter('time_done <', end_dt) query.order('-time_done') if cursor: query.with_cursor(cursor) problem_logs = query.fetch(1000) if len(problem_logs) > 0: logging.info("processing %d logs for %s" % (len(problem_logs), exid)) stats = fancy_stats_from_logs(problem_logs) pickled = pickle_util.dump(stats) shard = ExerciseStatisticShard( key_name=key_name, exid=exid, start_dt=start_dt, end_dt=end_dt, cursor=cursor, blob_val=pickled) shard.put() enqueue_task(exid, start_dt, end_dt, query.cursor(), uid, i + 1) else: # No more problem logs left to process logging.info("Summing all stats for %s", exid) all_stats = fancy_stats_shard_reducer(exid, start_dt, end_dt) model = ExerciseStatistic( key_name=ExerciseStatistic.make_key(exid, start_dt, end_dt), exid=exid, start_dt=start_dt, end_dt=end_dt, blob_val=pickle_util.dump(all_stats), log_count=all_stats['log_count']) model.put() logging.info("done processing %d logs for %s", all_stats['log_count'], exid)
def set_multi(mapping, time=DEFAULT_LAYER_CACHE_EXPIRATION_SECONDS, namespace=""): ''' sets multiple KeyValueCache entries at once. It mirrors the parameters of memcache.set_multi. Note: set_multi is not atomic ''' namespaced_mapping = dict( (KeyValueCache.get_namespaced_key(key, namespace), value) for key, value in mapping.iteritems()) dt = datetime.datetime.now() dt_expires = datetime.datetime.max if time > 0: dt_expires = dt + datetime.timedelta(seconds=time) key_values = [] for namespaced_key, value in namespaced_mapping.iteritems(): # check to see if we need to pickle the results pickled = False if not isinstance(value, str): pickled = True value = pickle_util.dump(value) key_values.append(KeyValueCache( key_name = namespaced_key, value = value, created = dt, expires = dt_expires, pickled = pickled)) db.put(key_values)
def test_new_class_to_old_class(self): class NewOldClass(object): """New-style class.""" def __init__(self, x, y): self.x = x self.y = y # A trick so we can pickle this class even though it's nested. setattr(sys.modules[__name__], 'NewOldClass', NewOldClass) pickled_new = pickle_util.dump(NewOldClass(5, 11)) # Redefine NewOldClass to be old-style del NewOldClass class NewOldClass: """Old-style class.""" def __init__(self, x, y): self.x = x self.y = y setattr(sys.modules[__name__], 'NewOldClass', NewOldClass) # Make sure the unpickling uses pickle, not cpickle old_cpickle = pickle_util.cPickle old_unpickler_class = pickle_util.g_unpickler_class try: pickle_util.cPickle = pickle_util.pickle pickle_util.g_unpickler_class = ( pickle_util.RenamedClassUnpicklerForPicklePy) foo = pickle_util.load(pickled_new) self.assertEqual(5, foo.x) self.assertEqual(11, foo.y) self.assertEqual('Old-style class.', foo.__doc__) finally: pickle_util.cPickle = old_cpickle pickle_util.g_unpickler_class = old_unpickler_class
def _set_css_deferred(user_data_key, video_key, status, version): user_data = user_models.UserData.get(user_data_key) uvc = UserVideoCss.get_for_user_data(user_data) css = pickle_util.load(uvc.pickled_dict) id = '.v%d' % video_key.id() if status == UserVideoCss.STARTED: if id in css['completed']: logging.warn("video [%s] for [%s] went from completed->started. ignoring." % (video_key, user_data_key)) else: css['started'].add(id) else: css['started'].discard(id) css['completed'].add(id) uvc.pickled_dict = pickle_util.dump(css) uvc.load_pickled() # if set_css_deferred runs out of order then we bump the version number # to break the cache if version < uvc.version: version = uvc.version + 1 user_data.uservideocss_version += 1 db.put(user_data) uvc.version = version db.put(uvc)
def get_for_user_data(user_data): p = pickle_util.dump({'started': set([]), 'completed': set([])}) return UserVideoCss.get_or_insert(UserVideoCss._key_for(user_data), user=user_data.user, video_css='', pickled_dict=p, )
def set_multi(mapping, time=DEFAULT_LAYER_CACHE_EXPIRATION_SECONDS, namespace=""): ''' sets multiple KeyValueCache entries at once. It mirrors the parameters of memcache.set_multi. Note: set_multi is not atomic ''' namespaced_mapping = dict( (KeyValueCache.get_namespaced_key(key, namespace), value) for key, value in mapping.iteritems()) dt = datetime.datetime.now() dt_expires = datetime.datetime.max if time > 0: dt_expires = dt + datetime.timedelta(seconds=time) key_values = [] for namespaced_key, value in namespaced_mapping.iteritems(): # check to see if we need to pickle the results pickled = False if not isinstance(value, str): pickled = True value = pickle_util.dump(value) key_values.append( KeyValueCache(key_name=namespaced_key, value=value, created=dt, expires=dt_expires, pickled=pickled)) db.put(key_values)
def save(experiment, notes, emotions): """Save notes and emo list, associating with specified experiment.""" notes = _GAEBingoExperimentNotes( key_name = _GAEBingoExperimentNotes.key_for_experiment(experiment), parent = experiment, notes = notes, pickled_emotions = pickle_util.dump(emotions)) notes.put()
def test_rewritten_class_instance(self): global OldClass # Mock out the rename-map. pickle_util._CLASS_RENAME_MAP = { ('pickle_util_test', 'OldClass'): ('mod.submod1.submod2', 'NewClass') } pickled = pickle_util.dump(OldClass()) # Just to make this more fun, delete OldClass del OldClass actual = pickle_util.load(pickled) import mod.submod1.submod2 self.assertTrue(isinstance(actual, mod.submod1.submod2.NewClass))
def persist_gae_bingo_identity_records(list_identities): dict_identity_caches = memcache.get_multi([BingoIdentityCache.key_for_identity(ident) for ident in list_identities]) for ident in list_identities: identity_cache = dict_identity_caches.get(BingoIdentityCache.key_for_identity(ident)) if identity_cache: bingo_identity = _GAEBingoIdentityRecord( key_name = _GAEBingoIdentityRecord.key_for_identity(ident), identity = ident, pickled = pickle_util.dump(identity_cache), ) bingo_identity.put()
def setUp(self): super(LayoutTest, self).setUp(db_consistency_probability=1) self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_taskqueue_stub() self.taskqueue_stub = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME) json_data=open('testutil/topictree.json') data = json.load(json_data) version = topic_models.TopicVersion.create_new_version() version.default = True version.put() version = topic_models.TopicVersion.create_edit_version() v1_utils.topictree_import_task("edit", "root", False, zlib.compress(pickle_util.dump(data)))
def set(key, value, time=None, namespace="", cache_class=memcache, compress=True): ''' This function will pickle and perhaps compress value, before then breaking it up into 1MB chunks and storing it with set_multi to whatever class cache_class is set to (memcache or KeyValueCache) ''' result = pickle_util.dump(value) if compress: result = zlib.compress(result) size = len(result) if size > MAX_SIZE: logging.warning("Not caching %s: %i is greater than maxsize %i" % (key, size, MAX_SIZE)) return # if now that we have compressed the item it can fit within a single # 1MB object don't use the chunk_list, and it will save us from having # to do an extra round-trip on the gets if size < MAX_SIZE_OF_CACHE_CHUNKS: return cache_class.set(key, ChunkedResult(data=result, compress=compress), time=time, namespace=namespace) mapping = {} chunk_list = [] generation = os.urandom(CHUNK_GENERATION_LENGTH) for i, pos in enumerate(range(0, size, MAX_SIZE_OF_CACHE_CHUNKS)): chunk = generation + result[pos:pos + MAX_SIZE_OF_CACHE_CHUNKS] chunk_key = key + "__chunk%i__" % i mapping[chunk_key] = chunk chunk_list.append(chunk_key) mapping[key] = ChunkedResult(chunk_list=chunk_list, generation=generation, compress=compress) # Note: set_multi is not atomic so when we get we will need to make sure # that all the keys are there and are part of the same set_multi # operation return cache_class.set_multi(mapping, time=time, namespace=namespace)
def create_experiment_and_alternatives( experiment_name, canonical_name, alternative_params=None, conversion_name=None, conversion_type=ConversionTypes.Binary, family_name=None, ): if not experiment_name: raise Exception("gae_bingo experiments must be named.") conversion_name = conversion_name or experiment_name if not alternative_params: # Default to simple True/False testing alternative_params = [True, False] experiment = _GAEBingoExperiment( key_name=_GAEBingoExperiment.key_for_name(experiment_name), name=experiment_name, canonical_name=canonical_name, family_name=family_name, conversion_name=conversion_name, conversion_type=conversion_type, live=True, ) alternatives = [] is_dict = type(alternative_params) == dict for i, content in enumerate(alternative_params): alternatives.append( _GAEBingoAlternative( key_name=_GAEBingoAlternative.key_for_experiment_name_and_number(experiment_name, i), parent=experiment, experiment_name=experiment.name, number=i, pickled_content=pickle_util.dump(content), live=True, weight=alternative_params[content] if is_dict else 1, ) ) return experiment, alternatives
def persist_gae_bingo_identity_records(list_identities): dict_identity_caches = memcache.get_multi([ BingoIdentityCache.key_for_identity(ident) for ident in list_identities ]) for ident in list_identities: identity_cache = dict_identity_caches.get( BingoIdentityCache.key_for_identity(ident)) if identity_cache: bingo_identity = _GAEBingoIdentityRecord( key_name=_GAEBingoIdentityRecord.key_for_identity(ident), identity=ident, pickled=pickle_util.dump(identity_cache), ) bingo_identity.put()
def topic_update_from_live(self, edit_version): layout.update_from_live(edit_version) try: response = urlfetch.fetch( url="http://www.khanacademy.org/api/v1/topictree", deadline=25) topictree = json.loads(response.content) logging.info("calling /_ah/queue/deferred_import") # importing the full topic tree can be too large so pickling and compressing deferred.defer(api.v1_utils.topictree_import_task, "edit", "root", True, zlib.compress(pickle_util.dump(topictree)), _queue="import-queue", _url="/_ah/queue/deferred_import") except urlfetch.Error, e: logging.exception("Failed to fetch content from khanacademy.org")
def set(key, value, time=None, namespace="", cache_class=memcache, compress=True): ''' This function will pickle and perhaps compress value, before then breaking it up into 1MB chunks and storing it with set_multi to whatever class cache_class is set to (memcache or KeyValueCache) ''' result = pickle_util.dump(value) if compress: result = zlib.compress(result) size = len(result) if size > MAX_SIZE: logging.warning("Not caching %s: %i is greater than maxsize %i" % (key, size, MAX_SIZE)) return # if now that we have compressed the item it can fit within a single # 1MB object don't use the chunk_list, and it will save us from having # to do an extra round-trip on the gets if size < MAX_SIZE_OF_CACHE_CHUNKS: return cache_class.set(key, ChunkedResult(data=result, compress=compress), time=time, namespace=namespace) mapping = {} chunk_list = [] generation = os.urandom(CHUNK_GENERATION_LENGTH) for i, pos in enumerate(range(0, size, MAX_SIZE_OF_CACHE_CHUNKS)): chunk = generation + result[pos : pos + MAX_SIZE_OF_CACHE_CHUNKS] chunk_key = key + "__chunk%i__" % i mapping[chunk_key] = chunk chunk_list.append(chunk_key) mapping[key] = ChunkedResult(chunk_list=chunk_list, generation= generation, compress=compress) # Note: set_multi is not atomic so when we get we will need to make sure # that all the keys are there and are part of the same set_multi # operation return cache_class.set_multi(mapping, time=time, namespace=namespace)
def create_experiment_and_alternatives(experiment_name, canonical_name, alternative_params=None, conversion_name=None, conversion_type=ConversionTypes.Binary, family_name=None): if not experiment_name: raise Exception("gae_bingo experiments must be named.") conversion_name = conversion_name or experiment_name if not alternative_params: # Default to simple True/False testing alternative_params = [True, False] experiment = _GAEBingoExperiment( key_name=_GAEBingoExperiment.key_for_name(experiment_name), name=experiment_name, canonical_name=canonical_name, family_name=family_name, conversion_name=conversion_name, conversion_type=conversion_type, live=True, ) alternatives = [] is_dict = type(alternative_params) == dict for i, content in enumerate(alternative_params): alternatives.append( _GAEBingoAlternative( key_name=_GAEBingoAlternative. key_for_experiment_name_and_number(experiment_name, i), parent=experiment, experiment_name=experiment.name, number=i, pickled_content=pickle_util.dump(content), live=True, weight=alternative_params[content] if is_dict else 1, )) return experiment, alternatives
def create_experiment_and_alternatives(experiment_name, canonical_name, alternative_params = None, conversion_name = None, conversion_type = ConversionTypes.Binary, family_name = None): if not experiment_name: raise Exception("gae_bingo experiments must be named.") conversion_name = conversion_name or experiment_name if not alternative_params: # Default to simple True/False testing alternative_params = [True, False] # Generate a random key name for this experiment so it doesn't collide with # any past experiments of the same name. All other entities, such as # alternatives, snapshots, and notes, will then use this entity as their # parent. experiment = _GAEBingoExperiment( key_name = "%s:%s" % ( experiment_name, os.urandom(8).encode("hex")), name = experiment_name, canonical_name = canonical_name, family_name = family_name, conversion_name = conversion_name, conversion_type = conversion_type, live = True, ) alternatives = [] is_dict = type(alternative_params) == dict for i, content in enumerate(alternative_params): alternatives.append( _GAEBingoAlternative( key_name = _GAEBingoAlternative.key_for_experiment_name_and_number(experiment_name, i), parent = experiment, experiment_name = experiment.name, number = i, pickled_content = pickle_util.dump(content), live = True, weight = alternative_params[content] if is_dict else 1, ) ) return experiment, alternatives
def topic_update_from_live(self, edit_version): layout.update_from_live(edit_version) try: if App.is_dev_server: topictree_url="http://khan-testing.appspot.com/topictree_test.json" else: topictree_url=url_util.absolute_url("/topictree.json") response = urlfetch.fetch( url=topictree_url, deadline=25) topictree = json.loads(response.content) logging.info("calling /_ah/queue/deferred_import") # importing the full topic tree can be too large so pickling and compressing deferred.defer(api.v1_utils.topictree_import_task, "edit", "root", True, zlib.compress(pickle_util.dump(topictree)), _queue="import-queue", _url="/_ah/queue/deferred_import") except urlfetch.Error, e: logging.exception("Failed to fetch content from khanacademy.org")
def compress(value): """Compress value so it'll fit in a single memcache value.""" pickled = pickle_util.dump(value) return zlib.compress(pickled)
def test_simple_class(self): """Test pickling and unpickling a class and class instance.""" expected = (OldClass, OldClass()) actual = pickle_util.load(pickle_util.dump(expected)) self.assertEqual(expected[0], actual[0]) self.assertEqual(type(expected[1]), type(actual[1]))
def test_simple(self): expected = 'i am a simple type' actual = pickle_util.load(pickle_util.dump(expected)) self.assertEqual(expected, actual)
def validate(self, value): """Validate that value is pickle-able (raise an exception if not).""" pickled_value = pickle_util.dump(value) _ = super(ObjectProperty, self).validate(pickled_value) return value
def test_using_cpickle_to_unpickle(self): expected = 'This is a test string' actual = cPickle.loads(pickle_util.dump(expected)) self.assertEqual(expected, actual)
def pickled(*args, **kwargs): return pickle_util.dump(func(*args, **kwargs))
def set_short_circuit_content(self, value): self.short_circuit_pickled_content = pickle_util.dump(value)
def store(self): # Store compressed results so we stay under the memcache 1MB limit pickled = pickle_util.dump(self) compressed_pickled = zlib.compress(pickled) return memcache.set(RequestStats.memcache_key(self.request_id), compressed_pickled)
def get_value_for_datastore(self, model_instance): result = (super(ObjectProperty, self) .get_value_for_datastore(model_instance)) result = pickle_util.dump(result) return db.Blob(result)