def audit_segments(halt_event, work_dir): log = logging.getLogger("audit_segments") if not os.path.exists(anti_entropy_dir): log.info("creating {0}".format(anti_entropy_dir)) os.mkdir(anti_entropy_dir) meta_repair_file_path = compute_meta_repair_file_path() meta_repair_file = \ gzip.GzipFile(filename=meta_repair_file_path, mode="wb") data_repair_file_path = compute_data_repair_file_path() data_repair_file = \ gzip.GzipFile(filename=data_repair_file_path, mode="wb") counts = { "total": 0, anti_entropy_missing_replicas: 0, anti_entropy_incomplete_finalization: 0, anti_entropy_damaged_records: 0, anti_entropy_missing_tombstones: 0, anti_entropy_database_inconsistancy: 0, } current_time = create_timestamp() min_segment_age = parse_timedelta_str(_min_segment_age) newest_allowable_timestamp = current_time - min_segment_age log.info("newest allowable timestamp = {0}".format( newest_allowable_timestamp.isoformat())) for row_key, segment_status, segment_data in generate_work(work_dir): if halt_event.is_set(): log.info("halt_event is set: exiting") return assert segment_status == anti_entropy_pre_audit counts["total"] += 1 # missing replicas needs to run first, because the other tests # assume there are no missing replicas if _missing_replicas(segment_data, newest_allowable_timestamp): log.debug("missing_replicas {0}".format(row_key)) counts[anti_entropy_missing_replicas] += 1 store_sized_pickle(( row_key, anti_entropy_missing_replicas, segment_data, ), data_repair_file) continue # _missing_tombstones needs to run ahead of _incomplete_finalization if _missing_tombstones(segment_data, newest_allowable_timestamp): log.debug("missing_tombstones {0}".format(row_key)) counts[anti_entropy_missing_tombstones] += 1 store_sized_pickle(( row_key, anti_entropy_missing_tombstones, segment_data, ), meta_repair_file) continue if _incomplete_finalization(segment_data, newest_allowable_timestamp): log.debug("incomplete_finalization {0}".format(row_key)) counts[anti_entropy_incomplete_finalization] += 1 store_sized_pickle(( row_key, anti_entropy_incomplete_finalization, segment_data, ), data_repair_file) continue if _damaged_records(segment_data): log.debug("damaged_records {0}".format(row_key)) counts[anti_entropy_damaged_records] += 1 store_sized_pickle(( row_key, anti_entropy_damaged_records, segment_data, ), data_repair_file) continue if _database_inconsistancy(row_key, segment_data): log.debug("database_inconsistancy {0}".format(row_key)) counts[anti_entropy_database_inconsistancy] += 1 store_sized_pickle(( row_key, anti_entropy_database_inconsistency, segment_data, ), data_repair_file) continue meta_repair_file.close() data_repair_file.close() keys = [ "total", anti_entropy_missing_replicas, anti_entropy_incomplete_finalization, anti_entropy_damaged_records, anti_entropy_missing_tombstones, anti_entropy_database_inconsistancy ] for key in keys: log.info("{0} {1:,}".format(key, counts[key]))
def audit_segments(halt_event, work_dir): log = logging.getLogger("audit_segments") if not os.path.exists(anti_entropy_dir): log.info("creating {0}".format(anti_entropy_dir)) os.mkdir(anti_entropy_dir) meta_repair_file_path = compute_meta_repair_file_path() meta_repair_file = \ gzip.GzipFile(filename=meta_repair_file_path, mode="wb") data_repair_file_path = compute_data_repair_file_path() data_repair_file = \ gzip.GzipFile(filename=data_repair_file_path, mode="wb") counts = { "total" : 0, anti_entropy_missing_replicas : 0, anti_entropy_incomplete_finalization : 0, anti_entropy_damaged_records : 0, anti_entropy_missing_tombstones : 0, anti_entropy_database_inconsistancy : 0, } current_time = create_timestamp() min_segment_age = parse_timedelta_str(_min_segment_age) newest_allowable_timestamp = current_time - min_segment_age log.info("newest allowable timestamp = {0}".format( newest_allowable_timestamp.isoformat())) for row_key, segment_status, segment_data in generate_work(work_dir): if halt_event.is_set(): log.info("halt_event is set: exiting") return assert segment_status == anti_entropy_pre_audit counts["total"] += 1 # missing replicas needs to run first, because the other tests # assume there are no missing replicas if _missing_replicas(segment_data, newest_allowable_timestamp): log.debug("missing_replicas {0}".format(row_key)) counts[anti_entropy_missing_replicas] += 1 store_sized_pickle( (row_key, anti_entropy_missing_replicas, segment_data, ), data_repair_file) continue # _missing_tombstones needs to run ahead of _incomplete_finalization if _missing_tombstones(segment_data, newest_allowable_timestamp): log.debug("missing_tombstones {0}".format(row_key)) counts[anti_entropy_missing_tombstones] += 1 store_sized_pickle( (row_key, anti_entropy_missing_tombstones, segment_data, ), meta_repair_file) continue if _incomplete_finalization(segment_data, newest_allowable_timestamp): log.debug("incomplete_finalization {0}".format(row_key)) counts[anti_entropy_incomplete_finalization] += 1 store_sized_pickle( (row_key, anti_entropy_incomplete_finalization, segment_data,), data_repair_file) continue if _damaged_records(segment_data): log.debug("damaged_records {0}".format(row_key)) counts[anti_entropy_damaged_records] += 1 store_sized_pickle( (row_key, anti_entropy_damaged_records, segment_data,), data_repair_file) continue if _database_inconsistancy(row_key, segment_data): log.debug("database_inconsistancy {0}".format(row_key)) counts[anti_entropy_database_inconsistancy] += 1 store_sized_pickle( (row_key, anti_entropy_database_inconsistency, segment_data,), data_repair_file) continue meta_repair_file.close() data_repair_file.close() keys = ["total", anti_entropy_missing_replicas, anti_entropy_incomplete_finalization, anti_entropy_damaged_records, anti_entropy_missing_tombstones, anti_entropy_database_inconsistancy] for key in keys: log.info("{0} {1:,}".format(key, counts[key]))
def main(): """ main entry point return 0 for success (exit code) """ global _max_value_file_time initialize_logging(_log_path) log = logging.getLogger("main") try: _max_value_file_time = parse_timedelta_str(_max_value_file_time_str) except Exception as instance: log.exception("Unable to parse '{0}' {1}".format( _max_value_file_time_str, instance)) return -1 log.info("program starts; max_value_file_time = {0}".format( _max_value_file_time)) zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "node_inspector") event_push_client.info("program-start", "node_inspector starts") try: connection = get_node_local_connection() except Exception as instance: log.exception("Exception connecting to database {0}".format(instance)) event_push_client.exception( unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__ ) return -1 known_value_files = dict() connection.begin_transaction() try: for batch in generate_work(connection): _process_work_batch(connection, known_value_files, batch) except Exception as instance: connection.rollback() log.exception("Exception processing batch {0} {1}".format( batch, instance)) event_push_client.exception( unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__ ) return -1 else: connection.commit() finally: connection.close() event_push_client.close() zmq_context.term() log.info("program terminates normally") return 0