Пример #1
0
def audit_segments(halt_event, work_dir):
    log = logging.getLogger("audit_segments")

    if not os.path.exists(anti_entropy_dir):
        log.info("creating {0}".format(anti_entropy_dir))
        os.mkdir(anti_entropy_dir)

    meta_repair_file_path = compute_meta_repair_file_path()
    meta_repair_file = \
            gzip.GzipFile(filename=meta_repair_file_path, mode="wb")

    data_repair_file_path = compute_data_repair_file_path()
    data_repair_file = \
            gzip.GzipFile(filename=data_repair_file_path, mode="wb")

    counts = {
        "total": 0,
        anti_entropy_missing_replicas: 0,
        anti_entropy_incomplete_finalization: 0,
        anti_entropy_damaged_records: 0,
        anti_entropy_missing_tombstones: 0,
        anti_entropy_database_inconsistancy: 0,
    }

    current_time = create_timestamp()
    min_segment_age = parse_timedelta_str(_min_segment_age)
    newest_allowable_timestamp = current_time - min_segment_age
    log.info("newest allowable timestamp = {0}".format(
        newest_allowable_timestamp.isoformat()))

    for row_key, segment_status, segment_data in generate_work(work_dir):
        if halt_event.is_set():
            log.info("halt_event is set: exiting")
            return

        assert segment_status == anti_entropy_pre_audit

        counts["total"] += 1

        # missing replicas needs to run first, because the other tests
        # assume there are no missing replicas
        if _missing_replicas(segment_data, newest_allowable_timestamp):
            log.debug("missing_replicas {0}".format(row_key))
            counts[anti_entropy_missing_replicas] += 1
            store_sized_pickle((
                row_key,
                anti_entropy_missing_replicas,
                segment_data,
            ), data_repair_file)
            continue

        # _missing_tombstones needs to run ahead of _incomplete_finalization
        if _missing_tombstones(segment_data, newest_allowable_timestamp):
            log.debug("missing_tombstones {0}".format(row_key))
            counts[anti_entropy_missing_tombstones] += 1
            store_sized_pickle((
                row_key,
                anti_entropy_missing_tombstones,
                segment_data,
            ), meta_repair_file)
            continue

        if _incomplete_finalization(segment_data, newest_allowable_timestamp):
            log.debug("incomplete_finalization {0}".format(row_key))
            counts[anti_entropy_incomplete_finalization] += 1
            store_sized_pickle((
                row_key,
                anti_entropy_incomplete_finalization,
                segment_data,
            ), data_repair_file)
            continue

        if _damaged_records(segment_data):
            log.debug("damaged_records {0}".format(row_key))
            counts[anti_entropy_damaged_records] += 1
            store_sized_pickle((
                row_key,
                anti_entropy_damaged_records,
                segment_data,
            ), data_repair_file)
            continue

        if _database_inconsistancy(row_key, segment_data):
            log.debug("database_inconsistancy {0}".format(row_key))
            counts[anti_entropy_database_inconsistancy] += 1
            store_sized_pickle((
                row_key,
                anti_entropy_database_inconsistency,
                segment_data,
            ), data_repair_file)
            continue

    meta_repair_file.close()
    data_repair_file.close()

    keys = [
        "total", anti_entropy_missing_replicas,
        anti_entropy_incomplete_finalization, anti_entropy_damaged_records,
        anti_entropy_missing_tombstones, anti_entropy_database_inconsistancy
    ]

    for key in keys:
        log.info("{0} {1:,}".format(key, counts[key]))
Пример #2
0
def audit_segments(halt_event, work_dir):
    log = logging.getLogger("audit_segments")

    if not os.path.exists(anti_entropy_dir):
        log.info("creating {0}".format(anti_entropy_dir))
        os.mkdir(anti_entropy_dir)

    meta_repair_file_path = compute_meta_repair_file_path()
    meta_repair_file = \
            gzip.GzipFile(filename=meta_repair_file_path, mode="wb")

    data_repair_file_path = compute_data_repair_file_path()
    data_repair_file = \
            gzip.GzipFile(filename=data_repair_file_path, mode="wb")

    counts = {
        "total"                                 : 0,
        anti_entropy_missing_replicas           : 0,
        anti_entropy_incomplete_finalization    : 0,
        anti_entropy_damaged_records            : 0,
        anti_entropy_missing_tombstones         : 0,
        anti_entropy_database_inconsistancy     : 0,
    }

    current_time = create_timestamp()
    min_segment_age = parse_timedelta_str(_min_segment_age) 
    newest_allowable_timestamp = current_time - min_segment_age
    log.info("newest allowable timestamp = {0}".format(
        newest_allowable_timestamp.isoformat()))

    for row_key, segment_status, segment_data in generate_work(work_dir):
        if halt_event.is_set():
            log.info("halt_event is set: exiting")
            return

        assert segment_status == anti_entropy_pre_audit

        counts["total"] += 1
        
        # missing replicas needs to run first, because the other tests
        # assume there are no missing replicas
        if _missing_replicas(segment_data, newest_allowable_timestamp):
            log.debug("missing_replicas {0}".format(row_key))
            counts[anti_entropy_missing_replicas] += 1
            store_sized_pickle(
                (row_key, anti_entropy_missing_replicas, segment_data, ), 
                data_repair_file)
            continue
        
        # _missing_tombstones needs to run ahead of _incomplete_finalization
        if _missing_tombstones(segment_data, newest_allowable_timestamp):
            log.debug("missing_tombstones {0}".format(row_key))
            counts[anti_entropy_missing_tombstones] += 1
            store_sized_pickle(
                (row_key, anti_entropy_missing_tombstones, segment_data, ), 
                meta_repair_file)
            continue

        if _incomplete_finalization(segment_data, newest_allowable_timestamp):
            log.debug("incomplete_finalization {0}".format(row_key))
            counts[anti_entropy_incomplete_finalization] += 1
            store_sized_pickle(
                (row_key, anti_entropy_incomplete_finalization, segment_data,), 
                data_repair_file)
            continue

        if _damaged_records(segment_data):
            log.debug("damaged_records {0}".format(row_key))
            counts[anti_entropy_damaged_records] += 1
            store_sized_pickle(
                (row_key, anti_entropy_damaged_records, segment_data,), 
                data_repair_file)
            continue

        if _database_inconsistancy(row_key, segment_data):
            log.debug("database_inconsistancy {0}".format(row_key))
            counts[anti_entropy_database_inconsistancy] += 1
            store_sized_pickle(
                (row_key, anti_entropy_database_inconsistency, segment_data,), 
                data_repair_file)
            continue

    meta_repair_file.close()
    data_repair_file.close()

    keys = ["total",
            anti_entropy_missing_replicas,
            anti_entropy_incomplete_finalization,
            anti_entropy_damaged_records,
            anti_entropy_missing_tombstones,
            anti_entropy_database_inconsistancy]

    for key in keys:
        log.info("{0} {1:,}".format(key, counts[key]))
Пример #3
0
def main():
    """
    main entry point
    return 0 for success (exit code)
    """
    global _max_value_file_time

    initialize_logging(_log_path)
    log = logging.getLogger("main")

    try:
        _max_value_file_time = parse_timedelta_str(_max_value_file_time_str)
    except Exception as instance:
        log.exception("Unable to parse '{0}' {1}".format(
            _max_value_file_time_str, instance))
        return -1

    log.info("program starts; max_value_file_time = {0}".format(
        _max_value_file_time))

    zmq_context =  zmq.Context()

    event_push_client = EventPushClient(zmq_context, "node_inspector")
    event_push_client.info("program-start", "node_inspector starts")  

    try:
        connection = get_node_local_connection()
    except Exception as instance:
        log.exception("Exception connecting to database {0}".format(instance))
        event_push_client.exception(
            unhandled_exception_topic,
            str(instance),
            exctype=instance.__class__.__name__
        )
        return -1

    known_value_files = dict()

    connection.begin_transaction()
    try:
        for batch in generate_work(connection):
            _process_work_batch(connection, known_value_files, batch)
    except Exception as instance:
        connection.rollback()
        log.exception("Exception processing batch {0} {1}".format(
            batch, instance))
        event_push_client.exception(
            unhandled_exception_topic,
            str(instance),
            exctype=instance.__class__.__name__
        )
        return -1
    else:
        connection.commit()
    finally:
        connection.close()
        event_push_client.close()
        zmq_context.term()

    log.info("program terminates normally")
    return 0