示例#1
0
def write(items):
    global COUNTER
    global BATCH_PREVIOUS_PATH
    """
    Batching example table:
        b_start = 0, b_size = 1000, counter = 1000: writes
        b_start = 1000, b_size = 1000, counter = 1000: breaks
        b_start = 1000, b_size = 1000, counter = 1001: writes
    """

    for item in items:
        if BATCH_START is not None\
                and BATCH_SIZE is not None\
                and COUNTER >= BATCH_START + BATCH_SIZE:
            # BATCH UNTIL
            break

        ppath = '/'.join(item.getPhysicalPath())

        if BATCH_PREVIOUS_PATH is not None\
                and BATCH_START is not None:
            # MEMORY SAVING BATCHING
            if BATCH_PREVIOUS_PATH == ppath:
                # BATCH_PREVIOUS_PATH is the path of the last item, which was
                # successfully exported in a previous batch. BATCH_START is the
                # counting state from where the new batch begins. We set
                # COUNTER to this state here:
                COUNTER = BATCH_START
                # Reset BATCH_PREVIOUS_PATH, so we don't visit this conditional
                # branch again.
                BATCH_PREVIOUS_PATH = None
            continue  # Always continue in this conditional branch.

        json_structure = None

        try:
            context_dict = Wrapper(item)
        except Exception, e:
            # tb = pprint.pformat(traceback.format_tb(sys.exc_info()[2]))
            # msg = 'ERROR: exception wrapping object: %s\n%s' % (str(e), tb)
            logger.warn('exception wrapping object %s. Error: %s' % (ppath, e))
            continue

        passed = False
        while not passed:
            try:
                # see, if we can serialize to json
                json_structure = json.dumps(context_dict)  # noqa
                passed = True
            except Exception, error:
                if "serializable" in str(error):
                    # Good place to inspect errors:
                    ## from ipdb import set_trace; set_trace()
                    key, context_dict = _clean_dict(context_dict, error)
                    logger.warn(
                        'Not serializable member %s of %s ignored. (%s)' %
                        (key, repr(item), ppath))
                    passed = False
                else:
                    logger.warn(
                        'ERROR: Unknown error serializing object %s: %s' %
                        (ppath, error))
                    continue
示例#2
0
def write(items):
    global COUNTER
    global BATCH_PREVIOUS_PATH
    """
    Batching example table:
        b_start = 0, b_size = 1000, counter = 1000: writes
        b_start = 1000, b_size = 1000, counter = 1000: breaks
        b_start = 1000, b_size = 1000, counter = 1001: writes
    """

    for item in items:
        if BATCH_START is not None\
                and BATCH_SIZE is not None\
                and COUNTER >= BATCH_START + BATCH_SIZE:
            # BATCH UNTIL
            break

        ppath = '/'.join(item.getPhysicalPath())

        if BATCH_PREVIOUS_PATH is not None\
                and BATCH_START is not None:
            # MEMORY SAVING BATCHING
            if BATCH_PREVIOUS_PATH == ppath:
                # BATCH_PREVIOUS_PATH is the path of the last item, which was
                # successfully exported in a previous batch. BATCH_START is the
                # counting state from where the new batch begins. We set
                # COUNTER to this state here:
                COUNTER = BATCH_START
                # Reset BATCH_PREVIOUS_PATH, so we don't visit this conditional
                # branch again.
                BATCH_PREVIOUS_PATH = None
            continue  # Always continue in this conditional branch.

        json_structure = None

        try:
            context_dict = Wrapper(item)
        except Exception, e:
            # tb = pprint.pformat(traceback.format_tb(sys.exc_info()[2]))
            # msg = 'ERROR: exception wrapping object: %s\n%s' % (str(e), tb)
            logger.warn('exception wrapping object %s. Error: %s' % (ppath, e))
            continue

        passed = False
        while not passed:
            try:
                # see, if we can serialize to json
                json_structure = json.dumps(context_dict)  # noqa
                passed = True
            except Exception, error:
                if "serializable" in str(error):
                    # Good place to inspect errors:
                    ## from ipdb import set_trace; set_trace()
                    key, context_dict = _clean_dict(context_dict, error)
                    logger.warn(
                        'Not serializable member %s of %s ignored. (%s)' % (
                            key,
                            repr(item),
                            ppath
                        )
                    )
                    passed = False
                else:
                    logger.warn(
                        'ERROR: Unknown error serializing object %s: %s' % (
                            ppath,
                            error
                        )
                    )
                    continue