def write(items): global COUNTER global BATCH_PREVIOUS_PATH """ Batching example table: b_start = 0, b_size = 1000, counter = 1000: writes b_start = 1000, b_size = 1000, counter = 1000: breaks b_start = 1000, b_size = 1000, counter = 1001: writes """ for item in items: if BATCH_START is not None\ and BATCH_SIZE is not None\ and COUNTER >= BATCH_START + BATCH_SIZE: # BATCH UNTIL break ppath = '/'.join(item.getPhysicalPath()) if BATCH_PREVIOUS_PATH is not None\ and BATCH_START is not None: # MEMORY SAVING BATCHING if BATCH_PREVIOUS_PATH == ppath: # BATCH_PREVIOUS_PATH is the path of the last item, which was # successfully exported in a previous batch. BATCH_START is the # counting state from where the new batch begins. We set # COUNTER to this state here: COUNTER = BATCH_START # Reset BATCH_PREVIOUS_PATH, so we don't visit this conditional # branch again. BATCH_PREVIOUS_PATH = None continue # Always continue in this conditional branch. json_structure = None try: context_dict = Wrapper(item) except Exception, e: # tb = pprint.pformat(traceback.format_tb(sys.exc_info()[2])) # msg = 'ERROR: exception wrapping object: %s\n%s' % (str(e), tb) logger.warn('exception wrapping object %s. Error: %s' % (ppath, e)) continue passed = False while not passed: try: # see, if we can serialize to json json_structure = json.dumps(context_dict) # noqa passed = True except Exception, error: if "serializable" in str(error): # Good place to inspect errors: ## from ipdb import set_trace; set_trace() key, context_dict = _clean_dict(context_dict, error) logger.warn( 'Not serializable member %s of %s ignored. (%s)' % (key, repr(item), ppath)) passed = False else: logger.warn( 'ERROR: Unknown error serializing object %s: %s' % (ppath, error)) continue
def write(items): global COUNTER global BATCH_PREVIOUS_PATH """ Batching example table: b_start = 0, b_size = 1000, counter = 1000: writes b_start = 1000, b_size = 1000, counter = 1000: breaks b_start = 1000, b_size = 1000, counter = 1001: writes """ for item in items: if BATCH_START is not None\ and BATCH_SIZE is not None\ and COUNTER >= BATCH_START + BATCH_SIZE: # BATCH UNTIL break ppath = '/'.join(item.getPhysicalPath()) if BATCH_PREVIOUS_PATH is not None\ and BATCH_START is not None: # MEMORY SAVING BATCHING if BATCH_PREVIOUS_PATH == ppath: # BATCH_PREVIOUS_PATH is the path of the last item, which was # successfully exported in a previous batch. BATCH_START is the # counting state from where the new batch begins. We set # COUNTER to this state here: COUNTER = BATCH_START # Reset BATCH_PREVIOUS_PATH, so we don't visit this conditional # branch again. BATCH_PREVIOUS_PATH = None continue # Always continue in this conditional branch. json_structure = None try: context_dict = Wrapper(item) except Exception, e: # tb = pprint.pformat(traceback.format_tb(sys.exc_info()[2])) # msg = 'ERROR: exception wrapping object: %s\n%s' % (str(e), tb) logger.warn('exception wrapping object %s. Error: %s' % (ppath, e)) continue passed = False while not passed: try: # see, if we can serialize to json json_structure = json.dumps(context_dict) # noqa passed = True except Exception, error: if "serializable" in str(error): # Good place to inspect errors: ## from ipdb import set_trace; set_trace() key, context_dict = _clean_dict(context_dict, error) logger.warn( 'Not serializable member %s of %s ignored. (%s)' % ( key, repr(item), ppath ) ) passed = False else: logger.warn( 'ERROR: Unknown error serializing object %s: %s' % ( ppath, error ) ) continue