def sort_order(indexes, data_set, need_log=False): """ introduction: Restore data in order of index. :param indexes: The indexes of data set. :param data_set: The disordered data set, the locations of this are corresponding to parameter "index". :param need_log: need output log. :returns matrix: Binary list in correct order. Type: Two-dimensional list(int). """ m = monitor.Monitor() if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Restore data order according to index.") # noinspection PyUnusedLocal matrix = [[0 for col in range(len(data_set[0]))] for row in range(len(indexes))] for row in range(len(indexes)): if need_log: m.output(row, len(indexes)) if 0 <= row < len(matrix): matrix[indexes[row]] = data_set[row] m.restore() del indexes, data_set, m return matrix
def read_dna_file(path, need_log=False): """ introduction: Reading DNA sequence set from documents. :param path: File path. Type: string :return dna_sequences: A corresponding DNA sequence string in which each row acts as a sequence. Type: one-dimensional list(string) :param need_log: need output log. """ m = monitor.Monitor() dna_sequences = [] try: with open(path, "r") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Read DNA sequences from file: " + path) # Read current file by line lines = file.readlines() for index in range(len(lines)): if need_log: m.output(index, len(lines)) line = lines[index] dna_sequences.append([line[col] for col in range(len(line) - 1)]) return dna_sequences except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def encode(self, matrix, size, need_log=False): """ introduction: Encode DNA sequences from the data of binary file. :param matrix: Generated binary two-dimensional matrix. The data of this matrix contains only 0 or 1 (non-char). Type: int or bit. :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int :param need_log: Show the log. :return dna_sequences: The DNA sequence of len(matrix) rows. Type: list(string). """ self.file_size = size self.m.restore() if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Encode the matrix by Simple Codec.") dna_sequences = [] for row in range(len(matrix)): if need_log: self.m.output(row, len(matrix)) dna_sequences.append(self._list_to_sequence(matrix[row])) return dna_sequences
def decode(self, dna_sequences, need_log=False): """ introduction: Decode DNA sequences to the data of binary file. :param dna_sequences: The DNA sequence of len(matrix) rows. Type: One-dimensional list(string). :param need_log: Show the log. :return matrix: The binary matrix corresponding to the DNA sequences. Type: Two-dimensional list(int). :return file_size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int """ self.m.restore() if need_log: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Convert DNA sequences to binary matrix by Simple Codec.") matrix = [] for index in range(len(dna_sequences)): if need_log: self.m.output(index, len(dna_sequences)) matrix.append(self._sequence_to_list(dna_sequences[index])) self.m.restore() return matrix, self.file_size
def connect_all(matrix, need_log=False): """ introduction: Integrate index and data from the two-dimensional matrix. :param matrix: Data from input. Type: Two-dimensional list(int). :param need_log: :return new_matrix: Data for output. Type: Two-dimensional list(int). """ m = monitor.Monitor() index_binary_length = int(len(str(bin(len(matrix)))) - 2) if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Add index in the binary matrix.") new_matrix = [] for row in range(len(matrix)): if need_log: m.output(row, len(matrix)) new_matrix.append(connect(row, matrix[row], index_binary_length)) m.restore() del matrix, m return new_matrix
def add_for_matrix(self, matrix): """ introduction: Add Reed-Solomon error correction for origin matrix. :param matrix: Origin matrix. The data of this matrix contains only 0 or 1 (non-char). Type: Two-dimensional list(int). :return verity_matrix: Verifiable matrix. Type: Two-dimensional list(int). """ if self.need_log: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Add the error correction for matrix.", ) if len(matrix[0]) / 8 + self.check_size > 255: if self.need_log: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Data length is too long, encoding and decoding will take a lot of time.", ) self.length_examine = True self.original_size = len(matrix[0]) verify_matrix = [] for row in range(len(matrix)): verify_matrix.append(self.add_for_list(matrix[row])) return verify_matrix
def divide_all(matrix, need_log=False): """ introduction: Separate data from indexes in binary strings. :param matrix: The DNA sequence of len(matrix) rows. Type: Two-dimensional list(int). :param need_log: need output log. :returns index, datas: Obtained data sets and index sets in corresponding locations. Type: One-dimensional list(int), Two-dimensional list(int). """ m = monitor.Monitor() index_binary_length = int(len(str(bin(len(matrix)))) - 2) if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Divide index and data from binary matrix.") indexs = [] datas = [] for row in range(len(matrix)): if need_log: m.output(row, len(matrix)) index, data = divide(matrix[row], index_binary_length) indexs.append(index) datas.append(data) m.restore() del matrix, m return indexs, datas
def _sequence_to_list(self, dna_sequence): """ introduction: Convert one DNA sequence to one binary list. :param dna_sequence: One DNA sequence. The length of DNA sequence should be a multiple of 9. Type: String. :return one_list: The binary list corresponding to the DNA sequence. Type: One-dimensional list(int). """ if len(dna_sequence) % 3 != 0: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The length of dna sequence should be a multiple of 9!") one_list = [] for index in range(0, len(dna_sequence), 9): first = self.mapping_rule[1][self.mapping_rule[0].index("".join( dna_sequence[index:index + 3]))] second = self.mapping_rule[1][self.mapping_rule[0].index("".join( dna_sequence[index + 3:index + 6]))] third = self.mapping_rule[1][self.mapping_rule[0].index("".join( dna_sequence[index + 6:index + 9]))] decimal_number = first decimal_number = decimal_number * 47 + second decimal_number = decimal_number * 47 + third one_list += list( map(int, list(str(bin(decimal_number))[2:].zfill(16)))) return one_list
def add_for_matrix(self, matrix): """ introduction: Add Hamming error correction for origin matrix. :param matrix: Origin matrix. The data of this matrix contains only 0 or 1 (non-char). Type: Two-dimensional list(int). :return verity_matrix: Verifiable matrix. Type: Two-dimensional list(int). """ if self.need_log: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Add the error correction for matrix.", ) # Calculate the length needed for detection site. detect_site_length = 0 while (len(matrix[0]) + detect_site_length + 1) > (pow( 2, detect_site_length)): detect_site_length += 1 verity_matrix = [] for row in range(len(matrix)): verity_matrix.append( self.add_for_list(matrix[row], detect_site_length)) return verity_matrix
def write_dna_file(path, dna_sequences, need_log=False): """ introduction: Writing DNA sequence set to documents. :param path: File path. Type: string :param dna_sequences: Generated DNA sequences. Type: one-dimensional list(string) :param need_log: choose to output log file or not. """ m = monitor.Monitor() try: with open(path, "w") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Write DNA sequences to file: " + path) for row in range(len(dna_sequences)): if need_log: m.output(row, len(dna_sequences)) file.write("".join(dna_sequences[row]) + "\n") return dna_sequences except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def __find_errors__(self, syndromes, length, row=None): """ introduction: Find error locator polynomial with Berlekamp-Massey algorithm. :param syndromes: :param length: :return error_positions: """ error_polynomial = [1] old_polynomial = [1] for index in range(0, len(syndromes)): old_polynomial.append(0) delta = syndromes[index] for position in range(1, len(error_polynomial)): delta ^= self.__galois_field_multiply__( error_polynomial[len(error_polynomial) - 1 - position], syndromes[index - position], ) if delta != 0: if len(old_polynomial) > len(error_polynomial): new_polynomial = self.__galois_field_scale__( old_polynomial, delta) old_polynomial = self.__galois_field_scale__( error_polynomial, self.__galois_field_division__(1, delta)) error_polynomial = new_polynomial error_polynomial = self.__galois_field_add__( error_polynomial, self.__galois_field_scale__(old_polynomial, delta)) errors = len(error_polynomial) - 1 if errors * 2 > len(syndromes): if row is not None: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Row " + str(row) + " has too many erasures to correct!", ) else: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Too many erasures to correct!", ) # find zeros of error polynomial error_positions = [] for index in range(length): if (self.__galois_field_evaluate__( error_polynomial, self.galois_field_exp[255 - index]) == 0): error_positions.append(length - 1 - index) if len(error_positions) != errors: # couldn't find error locations return None return error_positions
def encode(method, input_path, output_path, model_path=None, verify=None, need_index=True, segment_length=120, need_log=False): """ introduction: Use the selected method, convert the binary file to DNA sequence set and output the DNA sequence set. :param method: Method under folder "methods/". Type: Object. :param input_path: The path of binary file you need to convert. Type: String. :param output_path: The path of DNA sequence set you need to use to . Type: String. :param model_path: The path of model file if you want to save Type: String :param verify: Error correction method under "methods/verifies/" Type: Object. :param need_index: Declare whether the binary sequence indexes are required in the DNA sequences. Type: bool. :param segment_length: The cut length of DNA sequence. Considering current DNA synthesis factors, we usually set 120 bases as a sequence. :param need_log: Show the log. """ if input_path is None or len(input_path) == 0: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The input file path is invalid!") if output_path is None or len(input_path) == 0: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The output file path is invalid!") input_matrix, size = data_handle.read_binary_from_all(input_path, segment_length, need_log) if verify is not None: input_matrix = verify.add_for_matrix(input_matrix, need_log) if need_index: input_matrix = index_operator.connect_all(input_matrix, need_log) dna_sequences = method.encode(input_matrix, size, need_log) if model_path is not None: saver.save_model(model_path, method) data_handle.write_dna_file(output_path, dna_sequences, need_log)
def encode(self, matrix, size, need_log=False): """ introduction: Encode DNA sequences from the data of binary file. :param matrix: Generated binary two-dimensional matrix. The data of this matrix contains only 0 or 1 (non-char). Type: int or bit. :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int :param need_log: show the log. :return dna_sequences: The DNA sequence of len(matrix) rows. Type: list(list(char)). """ self.file_size = size self.segment_length = len(matrix[0]) if self.segment_length % 8 != 0: temp_matrix = [] for row in range(len(matrix)): temp_matrix.append( [0 for col in range(self.segment_length % 8)] + matrix[row]) matrix = temp_matrix self.m.restore() if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Generate the huffman dictionary.") if self.fixed_huffman: self._huffman_dict() else: self._huffman_dict(matrix) self.m.restore() if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Convert matrix to DNA sequence set.") dna_sequences = [] for row in range(len(matrix)): if need_log: self.m.output(row, len(matrix)) dna_sequences.append( self._list_to_sequence(self._huffman_compressed(matrix[row]))) self.m.restore() return dna_sequences
def _init_check(self): """ introduction: The verification of initialization parameters. """ if 0 <= min(self.mapping_rule) and max(self.mapping_rule) <= 1: if self.mapping_rule.count(0) != 2 or self.mapping_rule.count( 1) != 2: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "Mapping rule is wrong!") else: if (0 not in self.mapping_rule) or (1 not in self.mapping_rule) \ or (2 not in self.mapping_rule) or (3 not in self.mapping_rule): log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "Mapping rule is wrong!")
def encode(self, matrix, size, need_log=False): """ introduction: Encode DNA sequences from the data of binary file. :param matrix: Generated binary two-dimensional matrix. The data of this matrix contains only 0 or 1 (non-char). The length of col should be a multiple of 16. Type: int or bit. :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int :param need_log: show the log. :return dna_sequences: The DNA sequence of len(matrix) rows. Type: list(string). """ self.file_size = size self.segment_length = len(matrix[0]) self.m.restore() if self.segment_length % 16 != 0: temp_matrix = [] for row in range(len(matrix)): temp_matrix.append( [0 for col in range(16 - (self.segment_length % 16))] + matrix[row]) matrix = temp_matrix dna_sequences = [] if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Encode the matrix by Grass Codec.") for row in range(len(matrix)): if need_log: self.m.output(row, len(matrix)) dna_sequences.append(self._list_to_sequence(matrix[row])) self.m.restore() return dna_sequences
def load_model(path, need_log=False): """ introduction: Load model from file. :param path: The path of file. Type: .pkl :return: needed model. e.g. YYC. :param need_log: choose to output log file or not. """ if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Load model from file: " + path) with open(path, "rb") as file: return pickle.load(file)
def add_for_list(self, input_list): """ introduction: Add Reed-Solomon error correction for a origin list. :param input_list: Origin list. The data of this matrix contains only 0 or 1 (non-char). Type: One-dimensional list(int). :return output_list: The binary list completing processing. The data of this matrix contains only 0 or 1 (non-char). Type: One-dimensional list(int). """ if len(input_list) / 8 + self.check_size > 255: if self.length_examine is False: if self.need_log: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Data length is too long, encoding and decoding will take a lot of time.", ) if self.original_size == -1: self.original_size = len(input_list) if len(input_list) % 8 != 0: add_length = 8 - len(input_list) % 8 input_list = [0 for add_bit in range(add_length)] + input_list input_list = self.__binary_to_decimal__(input_list) output_list = [0] * (len(input_list) + self.check_size) output_list[:len(input_list)] = input_list for data_index in range(len(input_list)): coefficient = output_list[data_index] if coefficient != 0: for rs_index in range(len(self.rs_generator)): output_list[data_index + rs_index] ^= self.__galois_field_multiply__( self.rs_generator[rs_index], coefficient) output_list[:len(input_list)] = input_list output_list = self.__decimal_to_binary__(output_list) return output_list
def write_all_from_binary(path, matrix, size, need_log=False): """ introduction: Writing binary matrix to document. :param path: File path. Type: string :param matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation. Type: two-dimensional list(int) :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int :param need_log: choose to output log file or not. """ m = monitor.Monitor() try: with open(path, "wb+") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Write file from binary matrix: " + path) # Change bit to byte (8 -> 1), and write a file as bytes bit_index = 0 temp_byte = 0 for row in range(len(matrix)): if need_log: m.output(row, len(matrix)) for col in range(len(matrix[0])): bit_index += 1 temp_byte *= 2 temp_byte += matrix[row][col] if bit_index == 8: if size >= 0: file.write(struct.pack("B", int(temp_byte))) bit_index = 0 temp_byte = 0 size -= 1 except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def decode(self, dna_sequences, need_log=False): """ introduction: Decode DNA sequences to the data of binary file. :param dna_sequences: The DNA sequence of len(matrix) rows. Type: One-dimensional list(string). :param need_log: show the log. :return matrix: The binary matrix corresponding to the dna sequences. Type: Two-dimensional list(int). :return file_size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int """ self.m.restore() if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Convert DNA sequences to binary matrix.") matrix = [] index_binary_length = int(len(str(bin(len(dna_sequences)))) - 2) for index in range(len(dna_sequences)): if need_log: self.m.output(index, len(dna_sequences)) matrix.append( self._huffman_decompressed( self._sequence_to_list(dna_sequences[index]), index_binary_length)) if len(matrix[0]) != self.segment_length: temp_matrix = [] for row in range(len(matrix)): temp_matrix.append(matrix[row][self.segment_length % 8:]) matrix = temp_matrix self.m.restore() return matrix, self.file_size
def save_model(path, model, need_log=False): """ introduction: Save model to file. :param path: The path of file. Usually in the models directory. :param model: Current model for encoding. Type: .pkl e.g. YYC. :param need_log: choose to output log file or not. """ if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Save model to file: " + path) with open(path, "wb") as file: pickle.dump(model, file)
def _list_to_sequence(self, one_list): """ introduction: from one binary list to DNA sequence. :param one_list: One binary list. Type: int or bit. :return dna_sequence: One DNA sequence. Type: List(char). """ dna_sequence = [] if 3 in self.mapping_rule: # unlimited mapping rule. if len(one_list) % 2 != 0: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "Data length cannot be odd number!") for index in range(0, len(one_list), 2): dna_sequence.append( inherent.index_base.get( self.mapping_rule.index(one_list[index] * 2 + one_list[index + 1]))) else: for index in range(len(one_list)): options = [ position for position, value in enumerate(self.mapping_rule) if value == one_list[index] ] sliding_window = dna_sequence[-3:] if len(sliding_window) == 3 and len(set(sliding_window)) == 1: bases = list(map(inherent.index_base.get, options)) for base in bases: if base != sliding_window[0]: dna_sequence.append(base) break else: dna_sequence.append( inherent.index_base.get(random.choice(options))) return dna_sequence
def verify_for_matrix(self, verity_matrix): """ introduction: Verify the correctness of the matrix and repair the error information to a certain extent. :param verity_matrix: Matrix waiting for validation. Type: Two-dimensional list(int). :return matrix: Matrix that has been verified even repaired. Type: Two-dimensional list(int). """ if self.need_log: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Verify and repair the matrix.", ) matrix = [] for row in range(len(verity_matrix)): matrix.append(self.verify_for_list(verity_matrix[row], row)) return matrix
def remove_for_matrix(self, verity_matrix): """ introduction: Remove Hamming error correction from origin matrix. :param verity_matrix: Verifiable matrix. The data of this matrix contains only 0 or 1 (non-char). Type: Two-dimensional list(int). :return matrix: Origin matrix. Type: Two-dimensional list(int). """ if self.need_log: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Remove the error correction for matrix.", ) matrix = [] for row in range(len(verity_matrix)): matrix.append(self.remove_for_list(verity_matrix[row])) return matrix
def decode(method=None, model_path=None, input_path=None, output_path=None, verify=None, has_index=True, need_log=False): """ introduction: Use the selected method, convert DNA sequence set to the binary file and output the binary file. :param method: Method under folder "methods/". If you have model file, you can use this function with out method. Type: Object. :param input_path: The path of DNA sequence set you need to convert. Type: String. :param output_path: The path of binary file consistent with previous documents. Type: String. :param model_path: The path of model file if you want to save Type: String :param verify: Error correction method under "methods/verifies/" Type: Object. :param has_index: Declare whether the DNA sequences contain binary sequence indexes. Type: bool. :param need_log: Show the log. """ if method is None and model_path is None: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The method you select does not exist!") else: if input_path is None or len(input_path) == 0: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The input file path is not valid!") if output_path is None or len(input_path) == 0: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The output file path is not valid!") if model_path is not None: method = saver.load_model(model_path) dna_sequences = data_handle.read_dna_file(input_path, need_log) output_matrix, size = method.decode(dna_sequences, need_log) if has_index: indexes, data_set = index_operator.divide_all(output_matrix, need_log) output_matrix = index_operator.sort_order(indexes, data_set, need_log) if verify is not None: output_matrix = verify.verify_for_matrix(output_matrix, need_log) output_matrix = verify.remove_for_matrix(output_matrix, need_log) data_handle.write_all_from_binary(output_path, output_matrix, size, need_log)
def read_binary_from_all(path, segment_length=120, need_log=False): """ introduction: Reading binary matrix from document. :param path: File path. Type: string :param segment_length: The binary segment length used for DNA sequence generation. Considering current DNA synthesis technique limitation, we usually set 120 as default segment length. :param need_log: choose to output log file or not. :return matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation. Type: two-dimensional list(int) """ m = monitor.Monitor() try: # Open selected file with open(path, mode="rb") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Read binary matrix from file: " + path) size = os.path.getsize(path) # Set init storage matrix matrix = [[0 for _ in range(segment_length)] for _ in range(math.ceil(size * 8 / segment_length))] row = 0 col = 0 for byte_index in range(size): if need_log: m.output(byte_index, size) # Read a file as bytes one_byte = file.read(1) element = list(map(int, list(str(bin(struct.unpack("B", one_byte)[0]))[2:].zfill(8)))) for bit_index in range(8): matrix[row][col] = element[bit_index] col += 1 if col == segment_length: col = 0 row += 1 if int(len(str(bin(len(matrix)))) - 2) * 7 > segment_length: if need_log: log.output(log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "The proportion of index in whole sequence may be high. \n" "It is recommended to increase the length of output DNA sequences " "or to divide the file into more segment pools") return matrix, size except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def _init_check(self): """ introduction: The verification of initialization parameters. """ if self.redundancy < 0: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The parameter \"redundancy\" is wrong, it is greater than or equal to 0!" ) if self.header_size < 0: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The parameter \"header_size\" is wrong, it is greater than or equal to 0!" ) if self.gc_content < 0 or self.gc_content > 0.5: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The parameter \"gc_content\" is wrong, it is in the [0, 0.5]!" )
def decode(self, dna_sequences, need_log=False): """ introduction: Decode DNA sequences to the data of binary file. :param dna_sequences: The DNA sequence of len(matrix) rows. The length of each DNA sequences should be a multiple of 9. Type: One-dimensional list(string). :param need_log: Show the log. :return matrix: The binary matrix corresponding to the dna sequences. Type: Two-dimensional list(int). :return file_size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int """ self.monitor.restore() # adjust the maximum recursion depth to "self.recursion_depth" in Python. sys.setrecursionlimit(self.recursion_depth) if self.decode_packets is None: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "We miss the parameter \"decode_packets\", please try again after inputting this parameter." ) if need_log: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "If we get the system crash named -1073741571(0xC00000FD), " "it is caused by the excessive function (_update_droplets) recursive calls.\n" "Please reduce the hyper-parameter \"redundancy\" or split the original digital file" " in the encoding process.") if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Decode the matrix by Fountain Codec.") # creating the solition distribution object self.prng = PRNG(K=self.decode_packets, delta=self.delta, c=self.c_dist) matrix = [None] * self.decode_packets done_segments = set() chunk_to_droplets = defaultdict(set) for dna_sequence in dna_sequences: droplet = Droplet() droplet.init_binaries(self.prng, dna_sequence, self.header_size) for chunk_num in droplet.chuck_indices: chunk_to_droplets[chunk_num].add(droplet) self._update_droplets(droplet, matrix, done_segments, chunk_to_droplets) if need_log: self.monitor.output(len(done_segments), self.decode_packets) if None in matrix or self.decode_packets - len(done_segments) > 0: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "Couldn't decode the whole file.") self.monitor.restore() return matrix, self.file_size
def encode(self, matrix, size, need_log=False): """ introduction: Encode DNA sequences from the data of binary file. :param matrix: Generated binary two-dimensional matrix. The data of this matrix contains only 0 or 1 (non-char). The length of col should be a multiple of 16. Type: int or bit. :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int :param need_log: Show the log. :return dna_sequences: The DNA sequence of len(matrix) rows. Type: list(string). """ self.file_size = size self.monitor.restore() if len(matrix[0]) % 2 == 1: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "Binary sequence length should be even.") if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Encode the matrix by Fountain Codec.") # calculate decode packets self.decode_packets = len(matrix) dna_sequences = [] final_count = math.ceil(len(matrix) * (1 + self.redundancy)) # things related to random number generator, starting an lfsr with a certain state and a polynomial for 32bits. lfsr = LFSR().lfsr_s_p() # creating the solition distribution object self.prng = PRNG(K=len(matrix), delta=self.delta, c=self.c_dist) used_bc = dict() while len(dna_sequences) < final_count: seed = next(lfsr) if seed in used_bc: continue # initialize droplet and trans-code to DNA. droplet = Droplet() dna_sequence = droplet.get_dna(seed, self.prng, matrix, self.header_size) # check validity. if validity.homopolymer("".join(dna_sequence), self.homopolymer) \ and validity.cg_content("".join(dna_sequence), 0.5 + self.gc_content): dna_sequences.append(dna_sequence) if need_log: self.monitor.output(len(dna_sequences), final_count) if need_log: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Fountain codes for which the inputted matrix is of full rank in the decoding process are " "decodable, the full rank depends on the hyper-parameter \"redundancy\" in the Fountain Codec.\n" "Therefore, we strongly recommend that we decode it directly to verify the decodable " "of the DNA file before conducting DNA synthesis experiments.") self.monitor.restore() return dna_sequences
def verify_for_list(self, input_list, row=None): """ introduction: Verify the correctness of the list and repair the error information to a certain extent. :param input_list: Verifiable list. The data of this matrix contains only 0 or 1 (non-char). Type: One-dimensional list(int). :param row: The number of rows of the matrix to which the list belongs. :return output_list: List that has been verified even repaired. Type: One-dimensional list(int). """ if row is None: if self.need_log: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Verify and repair the list.", ) output_list = self.__binary_to_decimal__(input_list) # find erasures erasure_positions = [] for index in range(len(output_list)): if output_list[index] < 0: output_list[index] = 0 erasure_positions.append(index) if len(erasure_positions) > self.check_size: if row is not None: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "Row" + str(row) + " has too many erasures to correct!", ) else: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "Too many erasures to correct!", ) syndromes = [ self.__galois_field_evaluate__(output_list, self.galois_field_exp[i]) for i in range(self.check_size) ] if max(syndromes) == 0: output_list = self.__decimal_to_binary__(output_list) return output_list forney_syndromes = self.__forney_syndromes__(syndromes, erasure_positions, len(output_list)) error_positions = self.__find_errors__(forney_syndromes, len(output_list), row) if erasure_positions is None: output_list = self.__decimal_to_binary__(output_list) return output_list if error_positions is None: error_positions = [] output_list = self.__correct_errata__( output_list, syndromes, erasure_positions + error_positions) if (max([ self.__galois_field_evaluate__(output_list, self.galois_field_exp[i]) for i in range(self.check_size) ]) > 0): if row is not None: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Row " + str(row) + "could not be correct!", ) else: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Could not be correct!", ) output_list = self.__decimal_to_binary__(output_list) return output_list
def verify_for_list(self, input_list, row=None): """ introduction: Verify the correctness of the list and repair the error information to a certain extent. :param input_list: The binary list requiring validation. Type: One-dimensional list(int). :param row: The number of rows of the matrix to which the list belongs. :return output_list: List that has been verified and repaired. Type: One-dimensional list(int). """ if row is None: if self.need_log: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Verify and repair the list.", ) input_list.reverse() detect_site, output_list, output_list_copy = 0, [], [] for index in range(0, len(input_list)): output_list.append(input_list[index]) output_list_copy.append(input_list[index]) if pow(2, detect_site) == index + 1: detect_site += 1 detect_site, parity_list = 0, [] for parity in range(0, (len(output_list))): if pow(2, detect_site) == parity + 1: start_index = pow(2, detect_site) - 1 index = start_index xor = [] while index < len(output_list): block = output_list[index:index + pow(2, detect_site)] xor.extend(block) index += pow(2, detect_site + 1) for xor_index in range(1, len(xor)): output_list[start_index] = output_list[start_index] ^ xor[ xor_index] parity_list.append(output_list[parity]) detect_site += 1 parity_list.reverse() error = sum( int(parity_list) * pow(2, index) for index, parity_list in enumerate(parity_list[::-1])) if error == 0: input_list.reverse() return input_list elif error >= len(output_list_copy): log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Multiple errors can be detected, but due to the limitation of error-correction settings, the errors cannot be located.", ) else: if self.need_log: if row is not None: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Error is No. " + str(len(output_list_copy) - error) + "bit, in " + str(row + 1) + " of matrix, and it is repaired.", ) else: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "Error is No. " + str(len(output_list_copy) - error) + "bit, and it is repaired.", ) if output_list_copy[error - 1] == 0: output_list_copy[error - 1] = 1 else: output_list_copy[error - 1] = 0 # output_list_copy[error - 1] = int(output_list_copy[error - 1] is False) output_list_copy.reverse() return output_list_copy