示例#1
0
def _process_repair_entries(index, source_node_name, req_socket):
    log = logging.getLogger("_process_repair_entries")

    repair_file_path = compute_data_repair_file_path()
    log.debug("opening {0}".format(repair_file_path))
    repair_file = gzip.GzipFile(filename=repair_file_path, mode="rb")

    record_number = 0
    while True:
        try:
            row_key, segment_status, segment_data = \
                    retrieve_sized_pickle(repair_file)
        except EOFError:
            log.debug("EOF at record number {0}".format(record_number))
            repair_file.close()
            return record_number

        damaged_sequence_numbers = list()
        for segment_row in segment_data:
            damaged_sequence_numbers.extend(
                segment_row["damaged_sequence_numbers"])

        segment_row = segment_data[index]

        record_number += 1
        result = {"record_number"       : record_number,
                  "action"              : None,	 
                  "part"                : None,	 
                  "zfec_padding_size"   : None,
                  "source_node_name"    : source_node_name,
                  "segment_num"         : segment_row["segment_num"],
                  "result"              : None,
                  "data"                : None,}

        expected_slice_count = \
            compute_expected_slice_count(segment_row["file_size"])

        for sequence_num in range(0, expected_slice_count):
            result["data"] = None
            if sequence_num in damaged_sequence_numbers:
                log.debug("{0} damaged sequence {1}".format(row_key,
                                                            sequence_num))
                result["action"] = "read"
                result["part"] = _compute_part_label(sequence_num, 
                                                     expected_slice_count)
                try:
                    result["zfec_padding_size"], data = \
                            _get_sequence_from_data_reader(req_socket, 
                                                           segment_row, 
                                                           sequence_num)
                except Exception as  instance:
                    log.exception("record #{0} sequence {1} {2}".format(
                        record_number, sequence_num, instance))
                    result["result"] = "error"
                else:
                    result["result"] = "success"
                    result["data"] = data
            else:
                result["action"] = "skip"
                result["result"] = None

            unified_id, conjoined_part = row_key
            sequence_key = (unified_id, 
                            conjoined_part, 
                            sequence_num, 
                            segment_row["segment_num"])
            log.debug("storing {0} {1}".format(sequence_key,
                                               result["action"]))
            store_sized_pickle((sequence_key, segment_status, result, ), 
                               sys.stdout.buffer)
示例#2
0
def _process_work_batch(connection, known_value_files, batch):
    log = logging.getLogger("_process_work_batch")

    assert len(batch) > 0
    batch_key = make_batch_key(batch[0])
    log.info("batch {0}".format(batch_key))

    missing_sequence_numbers = list()
    defective_sequence_numbers = list()

    expected_slice_count = compute_expected_slice_count(batch[0].file_size)
    expected_sequence_numbers = set(range(0, expected_slice_count))
    actual_sequence_numbers = set([entry.sequence_num for entry in batch])
    missing_sequence_numbers.extend(
            list(expected_sequence_numbers - actual_sequence_numbers))

    for entry in batch:
        if not entry.value_file_id in known_value_files:
            known_value_files[entry.value_file_id] = \
                    _value_file_status(connection, entry)
        value_file_status = known_value_files[entry.value_file_id]

        # if we don't have a value_file for any sequence, 
        # treat that as missing too
        if value_file_status == _value_file_missing:
            log.info("Missing value file {0} for {1} sequence {2}".format(
                entry.value_file_id, batch_key, entry.sequence_num))
            missing_sequence_numbers.append(entry.sequence_num)
            continue

        if not _always_check_entries:
            if value_file_status == _value_file_valid:
                continue

            # if none of the above branches were fruitful, 
            # then all records in the database that point to this value file 
            # must be verified by opening, seeking, reading, and hashing the 
            # record pointed to in the value file. This will be terribly costly 
            # in terms of IO because our work is not sorted by value file. 
            # Fortunately, data corruption should be rare enough that the 
            # efficiency will be irrelevant
            assert value_file_status == _value_file_questionable

        if not _verify_entry_against_value_file(entry):
            log.info("Defective value file {0} for {1} sequence {2}".format(
                entry.value_file_id, batch_key, entry.sequence_num))
            defective_sequence_numbers.append(entry.sequence_num)
            continue

    if len(missing_sequence_numbers) > 0:
        missing_sequence_numbers.sort()
        log.info("missing sequence numbers {0}".format(
            missing_sequence_numbers))
        _store_damaged_segment(connection, 
                               batch[0], 
                               damaged_segment_missing_sequence,
                               missing_sequence_numbers)

    if len(defective_sequence_numbers) > 0:
        defective_sequence_numbers.sort()
        log.info("defective sequence numbers {0}".format(
            defective_sequence_numbers))
        _store_damaged_segment(connection, 
                               batch[0], 
                               damaged_segment_defective_sequence,
                               defective_sequence_numbers)