Exemplo n.º 1
0
def sort_order(indexes, data_set, need_log=False):
    """
    introduction: Restore data in order of index.

    :param indexes: The indexes of data set.

    :param data_set: The disordered data set, the locations of this are corresponding to parameter "index".

    :param need_log: need output log.

    :returns matrix: Binary list in correct order.
                      Type: Two-dimensional list(int).
    """
    m = monitor.Monitor()

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Restore data order according to index.")

    # noinspection PyUnusedLocal
    matrix = [[0 for col in range(len(data_set[0]))]
              for row in range(len(indexes))]

    for row in range(len(indexes)):
        if need_log:
            m.output(row, len(indexes))
        if 0 <= row < len(matrix):
            matrix[indexes[row]] = data_set[row]

    m.restore()

    del indexes, data_set, m

    return matrix
Exemplo n.º 2
0
def read_dna_file(path, need_log=False):
    """
    introduction: Reading DNA sequence set from documents.

    :param path: File path.
                  Type: string

    :return dna_sequences: A corresponding DNA sequence string in which each row acts as a sequence.
                           Type: one-dimensional list(string)

    :param need_log: need output log.
    """

    m = monitor.Monitor()

    dna_sequences = []

    try:
        with open(path, "r") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Read DNA sequences from file: " + path)

            # Read current file by line
            lines = file.readlines()
            for index in range(len(lines)):
                if need_log:
                    m.output(index, len(lines))
                line = lines[index]
                dna_sequences.append([line[col] for col in range(len(line) - 1)])

        return dna_sequences
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
Exemplo n.º 3
0
    def encode(self, matrix, size, need_log=False):
        """
        introduction: Encode DNA sequences from the data of binary file.

        :param matrix: Generated binary two-dimensional matrix.
                        The data of this matrix contains only 0 or 1 (non-char).
                        Type: int or bit.

        :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                      Type: int

        :param need_log: Show the log.

        :return dna_sequences: The DNA sequence of len(matrix) rows.
                               Type: list(string).
        """
        self.file_size = size

        self.m.restore()

        if need_log:
            log.output(log.NORMAL, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Encode the matrix by Simple Codec.")

        dna_sequences = []
        for row in range(len(matrix)):
            if need_log:
                self.m.output(row, len(matrix))
            dna_sequences.append(self._list_to_sequence(matrix[row]))

        return dna_sequences
Exemplo n.º 4
0
    def decode(self, dna_sequences, need_log=False):
        """
        introduction: Decode DNA sequences to the data of binary file.

        :param dna_sequences: The DNA sequence of len(matrix) rows.
                              Type: One-dimensional list(string).

        :param need_log: Show the log.

        :return matrix: The binary matrix corresponding to the DNA sequences.
                         Type: Two-dimensional list(int).

        :return file_size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                            Type: int
        """
        self.m.restore()

        if need_log:
            log.output(
                log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                "Convert DNA sequences to binary matrix by Simple Codec.")

        matrix = []
        for index in range(len(dna_sequences)):
            if need_log:
                self.m.output(index, len(dna_sequences))
            matrix.append(self._sequence_to_list(dna_sequences[index]))

        self.m.restore()
        return matrix, self.file_size
Exemplo n.º 5
0
def connect_all(matrix, need_log=False):
    """
    introduction: Integrate index and data from the two-dimensional matrix.

    :param matrix: Data from input.
                   Type: Two-dimensional list(int).

    :param need_log:

    :return new_matrix: Data for output.
                        Type: Two-dimensional list(int).
    """
    m = monitor.Monitor()
    index_binary_length = int(len(str(bin(len(matrix)))) - 2)

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Add index in the binary matrix.")

    new_matrix = []
    for row in range(len(matrix)):
        if need_log:
            m.output(row, len(matrix))
        new_matrix.append(connect(row, matrix[row], index_binary_length))

    m.restore()

    del matrix, m

    return new_matrix
Exemplo n.º 6
0
    def add_for_matrix(self, matrix):
        """
        introduction: Add Reed-Solomon error correction for origin matrix.

        :param matrix: Origin matrix.
                       The data of this matrix contains only 0 or 1 (non-char).
                       Type: Two-dimensional list(int).

        :return verity_matrix: Verifiable matrix.
                               Type: Two-dimensional list(int).
        """
        if self.need_log:
            log.output(
                log.NORMAL,
                str(__name__),
                str(sys._getframe().f_code.co_name),
                "Add the error correction for matrix.",
            )

        if len(matrix[0]) / 8 + self.check_size > 255:
            if self.need_log:
                log.output(
                    log.WARN,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Data length is too long, encoding and decoding will take a lot of time.",
                )
        self.length_examine = True

        self.original_size = len(matrix[0])

        verify_matrix = []
        for row in range(len(matrix)):
            verify_matrix.append(self.add_for_list(matrix[row]))
        return verify_matrix
Exemplo n.º 7
0
def divide_all(matrix, need_log=False):
    """
    introduction: Separate data from indexes in binary strings.

    :param matrix: The DNA sequence of len(matrix) rows.
                   Type: Two-dimensional list(int).

    :param need_log: need output log.

    :returns index, datas: Obtained data sets and index sets in corresponding locations.
                            Type: One-dimensional list(int), Two-dimensional list(int).
    """
    m = monitor.Monitor()
    index_binary_length = int(len(str(bin(len(matrix)))) - 2)

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Divide index and data from binary matrix.")

    indexs = []
    datas = []

    for row in range(len(matrix)):
        if need_log:
            m.output(row, len(matrix))
        index, data = divide(matrix[row], index_binary_length)
        indexs.append(index)
        datas.append(data)

    m.restore()

    del matrix, m

    return indexs, datas
Exemplo n.º 8
0
    def _sequence_to_list(self, dna_sequence):
        """
        introduction: Convert one DNA sequence to one binary list.

        :param dna_sequence: One DNA sequence.
                           The length of DNA sequence should be a multiple of 9.
                           Type: String.

        :return one_list: The binary list corresponding to the DNA sequence.
                           Type: One-dimensional list(int).
        """

        if len(dna_sequence) % 3 != 0:
            log.output(
                log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                "The length of dna sequence should be a multiple of 9!")

        one_list = []

        for index in range(0, len(dna_sequence), 9):
            first = self.mapping_rule[1][self.mapping_rule[0].index("".join(
                dna_sequence[index:index + 3]))]
            second = self.mapping_rule[1][self.mapping_rule[0].index("".join(
                dna_sequence[index + 3:index + 6]))]
            third = self.mapping_rule[1][self.mapping_rule[0].index("".join(
                dna_sequence[index + 6:index + 9]))]

            decimal_number = first
            decimal_number = decimal_number * 47 + second
            decimal_number = decimal_number * 47 + third

            one_list += list(
                map(int, list(str(bin(decimal_number))[2:].zfill(16))))

        return one_list
Exemplo n.º 9
0
    def add_for_matrix(self, matrix):
        """
        introduction: Add Hamming error correction for origin matrix.

        :param matrix: Origin matrix.
                       The data of this matrix contains only 0 or 1 (non-char).
                       Type: Two-dimensional list(int).

        :return verity_matrix: Verifiable matrix.
                               Type: Two-dimensional list(int).
        """
        if self.need_log:
            log.output(
                log.NORMAL,
                str(__name__),
                str(sys._getframe().f_code.co_name),
                "Add the error correction for matrix.",
            )

        # Calculate the length needed for detection site.
        detect_site_length = 0
        while (len(matrix[0]) + detect_site_length + 1) > (pow(
                2, detect_site_length)):
            detect_site_length += 1

        verity_matrix = []

        for row in range(len(matrix)):
            verity_matrix.append(
                self.add_for_list(matrix[row], detect_site_length))

        return verity_matrix
Exemplo n.º 10
0
def write_dna_file(path, dna_sequences, need_log=False):
    """
    introduction: Writing DNA sequence set to documents.

    :param path: File path.
                  Type: string

    :param dna_sequences: Generated DNA sequences.
                          Type: one-dimensional list(string)

    :param need_log: choose to output log file or not.
    """

    m = monitor.Monitor()

    try:
        with open(path, "w") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Write DNA sequences to file: " + path)
            for row in range(len(dna_sequences)):
                if need_log:
                    m.output(row, len(dna_sequences))
                file.write("".join(dna_sequences[row]) + "\n")
        return dna_sequences
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
Exemplo n.º 11
0
    def __find_errors__(self, syndromes, length, row=None):
        """
        introduction: Find error locator polynomial with Berlekamp-Massey algorithm.

        :param syndromes:

        :param length:

        :return error_positions:
        """
        error_polynomial = [1]
        old_polynomial = [1]
        for index in range(0, len(syndromes)):
            old_polynomial.append(0)
            delta = syndromes[index]
            for position in range(1, len(error_polynomial)):
                delta ^= self.__galois_field_multiply__(
                    error_polynomial[len(error_polynomial) - 1 - position],
                    syndromes[index - position],
                )
            if delta != 0:
                if len(old_polynomial) > len(error_polynomial):
                    new_polynomial = self.__galois_field_scale__(
                        old_polynomial, delta)
                    old_polynomial = self.__galois_field_scale__(
                        error_polynomial,
                        self.__galois_field_division__(1, delta))
                    error_polynomial = new_polynomial
                error_polynomial = self.__galois_field_add__(
                    error_polynomial,
                    self.__galois_field_scale__(old_polynomial, delta))

        errors = len(error_polynomial) - 1
        if errors * 2 > len(syndromes):
            if row is not None:
                log.output(
                    log.WARN,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Row " + str(row) + " has too many erasures to correct!",
                )
            else:
                log.output(
                    log.WARN,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Too many erasures to correct!",
                )
        # find zeros of error polynomial
        error_positions = []
        for index in range(length):
            if (self.__galois_field_evaluate__(
                    error_polynomial,
                    self.galois_field_exp[255 - index]) == 0):
                error_positions.append(length - 1 - index)
        if len(error_positions) != errors:
            # couldn't find error locations
            return None
        return error_positions
Exemplo n.º 12
0
def encode(method, input_path, output_path,
           model_path=None, verify=None, need_index=True, segment_length=120, need_log=False):
    """
    introduction: Use the selected method, convert the binary file to DNA sequence
                  set and output the DNA sequence set.

    :param method: Method under folder "methods/".
                    Type: Object.

    :param input_path: The path of binary file you need to convert.
                        Type: String.

    :param output_path: The path of DNA sequence set you need to use to .
                         Type: String.

    :param model_path: The path of model file if you want to save
                        Type: String

    :param verify: Error correction method under "methods/verifies/"
                    Type: Object.

    :param need_index: Declare whether the binary sequence indexes are required
                       in the DNA sequences.
                        Type: bool.

    :param segment_length: The cut length of DNA sequence.
                      Considering current DNA synthesis factors, we usually
                      set 120 bases as a sequence.

    :param need_log: Show the log.
    """

    if input_path is None or len(input_path) == 0:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The input file path is invalid!")

    if output_path is None or len(input_path) == 0:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The output file path is invalid!")

    input_matrix, size = data_handle.read_binary_from_all(input_path, segment_length, need_log)

    if verify is not None:
        input_matrix = verify.add_for_matrix(input_matrix, need_log)

    if need_index:
        input_matrix = index_operator.connect_all(input_matrix, need_log)

    dna_sequences = method.encode(input_matrix, size, need_log)

    if model_path is not None:
        saver.save_model(model_path, method)

    data_handle.write_dna_file(output_path, dna_sequences, need_log)
Exemplo n.º 13
0
    def encode(self, matrix, size, need_log=False):
        """
        introduction: Encode DNA sequences from the data of binary file.

        :param matrix: Generated binary two-dimensional matrix.
                        The data of this matrix contains only 0 or 1 (non-char).
                        Type: int or bit.

        :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                      Type: int

        :param need_log: show the log.

        :return dna_sequences: The DNA sequence of len(matrix) rows.
                             Type: list(list(char)).
        """
        self.file_size = size

        self.segment_length = len(matrix[0])

        if self.segment_length % 8 != 0:
            temp_matrix = []
            for row in range(len(matrix)):
                temp_matrix.append(
                    [0
                     for col in range(self.segment_length % 8)] + matrix[row])
            matrix = temp_matrix

        self.m.restore()
        if need_log:
            log.output(log.NORMAL, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Generate the huffman dictionary.")
        if self.fixed_huffman:
            self._huffman_dict()
        else:
            self._huffman_dict(matrix)

        self.m.restore()
        if need_log:
            log.output(log.NORMAL, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Convert matrix to DNA sequence set.")
        dna_sequences = []

        for row in range(len(matrix)):
            if need_log:
                self.m.output(row, len(matrix))
            dna_sequences.append(
                self._list_to_sequence(self._huffman_compressed(matrix[row])))

        self.m.restore()
        return dna_sequences
Exemplo n.º 14
0
 def _init_check(self):
     """
     introduction: The verification of initialization parameters.
     """
     if 0 <= min(self.mapping_rule) and max(self.mapping_rule) <= 1:
         if self.mapping_rule.count(0) != 2 or self.mapping_rule.count(
                 1) != 2:
             log.output(log.ERROR, str(__name__),
                        str(sys._getframe().f_code.co_name),
                        "Mapping rule is wrong!")
     else:
         if (0 not in self.mapping_rule) or (1 not in self.mapping_rule) \
                 or (2 not in self.mapping_rule) or (3 not in self.mapping_rule):
             log.output(log.ERROR, str(__name__),
                        str(sys._getframe().f_code.co_name),
                        "Mapping rule is wrong!")
Exemplo n.º 15
0
    def encode(self, matrix, size, need_log=False):
        """
        introduction: Encode DNA sequences from the data of binary file.

        :param matrix: Generated binary two-dimensional matrix.
                        The data of this matrix contains only 0 or 1 (non-char).
                        The length of col should be a multiple of 16.
                        Type: int or bit.

        :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                      Type: int

        :param need_log: show the log.

        :return dna_sequences: The DNA sequence of len(matrix) rows.
                             Type: list(string).
        """

        self.file_size = size
        self.segment_length = len(matrix[0])
        self.m.restore()

        if self.segment_length % 16 != 0:
            temp_matrix = []
            for row in range(len(matrix)):
                temp_matrix.append(
                    [0 for col in range(16 - (self.segment_length % 16))] +
                    matrix[row])
            matrix = temp_matrix

        dna_sequences = []

        if need_log:
            log.output(log.NORMAL, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Encode the matrix by Grass Codec.")

        for row in range(len(matrix)):
            if need_log:
                self.m.output(row, len(matrix))
            dna_sequences.append(self._list_to_sequence(matrix[row]))

        self.m.restore()
        return dna_sequences
Exemplo n.º 16
0
def load_model(path, need_log=False):
    """
    introduction: Load model from file.

    :param path: The path of file.
                  Type: .pkl

    :return: needed model.
              e.g. YYC.

    :param need_log: choose to output log file or not.
    """
    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Load model from file: " + path)

    with open(path, "rb") as file:
        return pickle.load(file)
Exemplo n.º 17
0
    def add_for_list(self, input_list):
        """
        introduction: Add Reed-Solomon error correction for a origin list.

        :param input_list: Origin list.
                           The data of this matrix contains only 0 or 1 (non-char).
                           Type: One-dimensional list(int).

        :return output_list: The binary list completing processing.
                             The data of this matrix contains only 0 or 1 (non-char).
                             Type: One-dimensional list(int).
        """
        if len(input_list) / 8 + self.check_size > 255:
            if self.length_examine is False:
                if self.need_log:
                    log.output(
                        log.WARN,
                        str(__name__),
                        str(sys._getframe().f_code.co_name),
                        "Data length is too long, encoding and decoding will take a lot of time.",
                    )

        if self.original_size == -1:
            self.original_size = len(input_list)

        if len(input_list) % 8 != 0:
            add_length = 8 - len(input_list) % 8
            input_list = [0 for add_bit in range(add_length)] + input_list

        input_list = self.__binary_to_decimal__(input_list)
        output_list = [0] * (len(input_list) + self.check_size)
        output_list[:len(input_list)] = input_list
        for data_index in range(len(input_list)):
            coefficient = output_list[data_index]
            if coefficient != 0:
                for rs_index in range(len(self.rs_generator)):
                    output_list[data_index +
                                rs_index] ^= self.__galois_field_multiply__(
                                    self.rs_generator[rs_index], coefficient)
        output_list[:len(input_list)] = input_list
        output_list = self.__decimal_to_binary__(output_list)
        return output_list
Exemplo n.º 18
0
def write_all_from_binary(path, matrix, size, need_log=False):
    """
    introduction: Writing binary matrix to document.

    :param path: File path.
                  Type: string

    :param matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation.
                    Type: two-dimensional list(int)

    :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                  Type: int

    :param need_log: choose to output log file or not.
    """
    m = monitor.Monitor()

    try:
        with open(path, "wb+") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Write file from binary matrix: " + path)

            # Change bit to byte (8 -> 1), and write a file as bytes
            bit_index = 0
            temp_byte = 0
            for row in range(len(matrix)):
                if need_log:
                    m.output(row, len(matrix))
                for col in range(len(matrix[0])):
                    bit_index += 1
                    temp_byte *= 2
                    temp_byte += matrix[row][col]
                    if bit_index == 8:
                        if size >= 0:
                            file.write(struct.pack("B", int(temp_byte)))
                            bit_index = 0
                            temp_byte = 0
                            size -= 1
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
Exemplo n.º 19
0
    def decode(self, dna_sequences, need_log=False):
        """
        introduction: Decode DNA sequences to the data of binary file.

        :param dna_sequences: The DNA sequence of len(matrix) rows.
                            Type: One-dimensional list(string).

        :param need_log: show the log.

        :return matrix: The binary matrix corresponding to the dna sequences.
                         Type: Two-dimensional list(int).

        :return file_size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                            Type: int
        """

        self.m.restore()
        if need_log:
            log.output(log.NORMAL, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Convert DNA sequences to binary matrix.")

        matrix = []
        index_binary_length = int(len(str(bin(len(dna_sequences)))) - 2)

        for index in range(len(dna_sequences)):
            if need_log:
                self.m.output(index, len(dna_sequences))
            matrix.append(
                self._huffman_decompressed(
                    self._sequence_to_list(dna_sequences[index]),
                    index_binary_length))

        if len(matrix[0]) != self.segment_length:
            temp_matrix = []
            for row in range(len(matrix)):
                temp_matrix.append(matrix[row][self.segment_length % 8:])
            matrix = temp_matrix

        self.m.restore()

        return matrix, self.file_size
Exemplo n.º 20
0
def save_model(path, model, need_log=False):
    """
    introduction: Save model to file.

    :param path: The path of file.
                  Usually in the models directory.

    :param model: Current model for encoding.
                   Type: .pkl
                   e.g. YYC.

    :param need_log: choose to output log file or not.
    """
    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Save model to file: " + path)

    with open(path, "wb") as file:
        pickle.dump(model, file)
Exemplo n.º 21
0
    def _list_to_sequence(self, one_list):
        """
        introduction: from one binary list to DNA sequence.

        :param one_list: One binary list.
                          Type: int or bit.

        :return dna_sequence: One DNA sequence.
                              Type: List(char).
        """
        dna_sequence = []
        if 3 in self.mapping_rule:
            # unlimited mapping rule.
            if len(one_list) % 2 != 0:
                log.output(log.ERROR, str(__name__),
                           str(sys._getframe().f_code.co_name),
                           "Data length cannot be odd number!")
            for index in range(0, len(one_list), 2):
                dna_sequence.append(
                    inherent.index_base.get(
                        self.mapping_rule.index(one_list[index] * 2 +
                                                one_list[index + 1])))
        else:
            for index in range(len(one_list)):
                options = [
                    position
                    for position, value in enumerate(self.mapping_rule)
                    if value == one_list[index]
                ]
                sliding_window = dna_sequence[-3:]
                if len(sliding_window) == 3 and len(set(sliding_window)) == 1:
                    bases = list(map(inherent.index_base.get, options))
                    for base in bases:
                        if base != sliding_window[0]:
                            dna_sequence.append(base)
                            break
                else:
                    dna_sequence.append(
                        inherent.index_base.get(random.choice(options)))
        return dna_sequence
Exemplo n.º 22
0
    def verify_for_matrix(self, verity_matrix):
        """
        introduction: Verify the correctness of the matrix and repair the error information to a certain extent.

        :param verity_matrix: Matrix waiting for validation.
                              Type: Two-dimensional list(int).

        :return matrix: Matrix that has been verified even repaired.
                        Type: Two-dimensional list(int).
        """
        if self.need_log:
            log.output(
                log.NORMAL,
                str(__name__),
                str(sys._getframe().f_code.co_name),
                "Verify and repair the matrix.",
            )
        matrix = []
        for row in range(len(verity_matrix)):
            matrix.append(self.verify_for_list(verity_matrix[row], row))

        return matrix
Exemplo n.º 23
0
    def remove_for_matrix(self, verity_matrix):
        """
        introduction: Remove Hamming error correction from origin matrix.

        :param verity_matrix: Verifiable matrix.
                              The data of this matrix contains only 0 or 1 (non-char).
                              Type: Two-dimensional list(int).

        :return matrix: Origin matrix.
                        Type: Two-dimensional list(int).
        """
        if self.need_log:
            log.output(
                log.NORMAL,
                str(__name__),
                str(sys._getframe().f_code.co_name),
                "Remove the error correction for matrix.",
            )
        matrix = []
        for row in range(len(verity_matrix)):
            matrix.append(self.remove_for_list(verity_matrix[row]))
        return matrix
Exemplo n.º 24
0
def decode(method=None, model_path=None, input_path=None, output_path=None,
           verify=None, has_index=True, need_log=False):
    """
    introduction: Use the selected method, convert DNA sequence set to the binary
                  file and output the binary file.

    :param method: Method under folder "methods/".
                    If you have model file, you can use this function with out
                    method.
                    Type: Object.

    :param input_path: The path of DNA sequence set you need to convert.
                       Type: String.

    :param output_path: The path of binary file consistent with previous
                        documents.
                         Type: String.

    :param model_path: The path of model file if you want to save
                        Type: String

    :param verify: Error correction method under "methods/verifies/"
                    Type: Object.

    :param has_index: Declare whether the DNA sequences contain binary sequence
                      indexes.
                       Type: bool.

    :param need_log: Show the log.
    """

    if method is None and model_path is None:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The method you select does not exist!")
    else:
        if input_path is None or len(input_path) == 0:
            log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                       "The input file path is not valid!")

        if output_path is None or len(input_path) == 0:
            log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                       "The output file path is not valid!")

        if model_path is not None:
            method = saver.load_model(model_path)

        dna_sequences = data_handle.read_dna_file(input_path, need_log)

        output_matrix, size = method.decode(dna_sequences, need_log)

        if has_index:
            indexes, data_set = index_operator.divide_all(output_matrix, need_log)
            output_matrix = index_operator.sort_order(indexes, data_set, need_log)

        if verify is not None:
            output_matrix = verify.verify_for_matrix(output_matrix, need_log)
            output_matrix = verify.remove_for_matrix(output_matrix, need_log)

        data_handle.write_all_from_binary(output_path, output_matrix, size, need_log)
Exemplo n.º 25
0
def read_binary_from_all(path, segment_length=120, need_log=False):
    """
    introduction: Reading binary matrix from document.

    :param path: File path.
                  Type: string

    :param segment_length: The binary segment length used for DNA sequence generation.
                           Considering current DNA synthesis technique limitation,
                           we usually set 120 as default segment length.

    :param need_log: choose to output log file or not.

    :return matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation.
                    Type: two-dimensional list(int)
    """

    m = monitor.Monitor()
    try:

        # Open selected file
        with open(path, mode="rb") as file:

            if need_log:
                log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
                           "Read binary matrix from file: " + path)

            size = os.path.getsize(path)

            # Set init storage matrix
            matrix = [[0 for _ in range(segment_length)] for _ in range(math.ceil(size * 8 / segment_length))]

            row = 0
            col = 0
            for byte_index in range(size):
                if need_log:
                    m.output(byte_index, size)
                # Read a file as bytes
                one_byte = file.read(1)
                element = list(map(int, list(str(bin(struct.unpack("B", one_byte)[0]))[2:].zfill(8))))
                for bit_index in range(8):
                    matrix[row][col] = element[bit_index]
                    col += 1
                    if col == segment_length:
                        col = 0
                        row += 1

        if int(len(str(bin(len(matrix)))) - 2) * 7 > segment_length:
            if need_log:
                log.output(log.WARN, str(__name__), str(sys._getframe().f_code.co_name),
                           "The proportion of index in whole sequence may be high. \n"
                           "It is recommended to increase the length of output DNA sequences "
                           "or to divide the file into more segment pools")

        return matrix, size
    except IOError:
        log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                   "The file selection operation was not performed correctly. Please execute the operation again!")
Exemplo n.º 26
0
    def _init_check(self):
        """
        introduction: The verification of initialization parameters.

        """
        if self.redundancy < 0:
            log.output(
                log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                "The parameter \"redundancy\" is wrong, it is greater than or equal to 0!"
            )

        if self.header_size < 0:
            log.output(
                log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                "The parameter \"header_size\" is wrong, it is greater than or equal to 0!"
            )

        if self.gc_content < 0 or self.gc_content > 0.5:
            log.output(
                log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                "The parameter \"gc_content\" is wrong, it is in the [0, 0.5]!"
            )
Exemplo n.º 27
0
    def decode(self, dna_sequences, need_log=False):
        """
        introduction: Decode DNA sequences to the data of binary file.

        :param dna_sequences: The DNA sequence of len(matrix) rows.
                            The length of each DNA sequences should be a multiple of 9.
                            Type: One-dimensional list(string).

        :param need_log: Show the log.

        :return matrix: The binary matrix corresponding to the dna sequences.
                         Type: Two-dimensional list(int).

        :return file_size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                            Type: int
        """
        self.monitor.restore()

        # adjust the maximum recursion depth to "self.recursion_depth" in Python.
        sys.setrecursionlimit(self.recursion_depth)

        if self.decode_packets is None:
            log.output(
                log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
                "We miss the parameter \"decode_packets\", please try again after inputting this parameter."
            )

        if need_log:
            log.output(
                log.WARN, str(__name__), str(sys._getframe().f_code.co_name),
                "If we get the system crash named -1073741571(0xC00000FD), "
                "it is caused by the excessive function (_update_droplets) recursive calls.\n"
                "Please reduce the hyper-parameter \"redundancy\" or split the original digital file"
                " in the encoding process.")

        if need_log:
            log.output(log.NORMAL, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Decode the matrix by Fountain Codec.")

        # creating the solition distribution object
        self.prng = PRNG(K=self.decode_packets,
                         delta=self.delta,
                         c=self.c_dist)

        matrix = [None] * self.decode_packets
        done_segments = set()
        chunk_to_droplets = defaultdict(set)

        for dna_sequence in dna_sequences:
            droplet = Droplet()
            droplet.init_binaries(self.prng, dna_sequence, self.header_size)

            for chunk_num in droplet.chuck_indices:
                chunk_to_droplets[chunk_num].add(droplet)

            self._update_droplets(droplet, matrix, done_segments,
                                  chunk_to_droplets)

            if need_log:
                self.monitor.output(len(done_segments), self.decode_packets)

        if None in matrix or self.decode_packets - len(done_segments) > 0:
            log.output(log.ERROR, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Couldn't decode the whole file.")

        self.monitor.restore()

        return matrix, self.file_size
Exemplo n.º 28
0
    def encode(self, matrix, size, need_log=False):
        """
        introduction: Encode DNA sequences from the data of binary file.

        :param matrix: Generated binary two-dimensional matrix.
                        The data of this matrix contains only 0 or 1 (non-char).
                        The length of col should be a multiple of 16.
                        Type: int or bit.

        :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                      Type: int

        :param need_log: Show the log.

        :return dna_sequences: The DNA sequence of len(matrix) rows.
                             Type: list(string).
        """
        self.file_size = size
        self.monitor.restore()

        if len(matrix[0]) % 2 == 1:
            log.output(log.ERROR, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Binary sequence length should be even.")

        if need_log:
            log.output(log.NORMAL, str(__name__),
                       str(sys._getframe().f_code.co_name),
                       "Encode the matrix by Fountain Codec.")

        # calculate decode packets
        self.decode_packets = len(matrix)

        dna_sequences = []
        final_count = math.ceil(len(matrix) * (1 + self.redundancy))

        # things related to random number generator, starting an lfsr with a certain state and a polynomial for 32bits.
        lfsr = LFSR().lfsr_s_p()
        # creating the solition distribution object
        self.prng = PRNG(K=len(matrix), delta=self.delta, c=self.c_dist)

        used_bc = dict()
        while len(dna_sequences) < final_count:
            seed = next(lfsr)
            if seed in used_bc:
                continue

            # initialize droplet and trans-code to DNA.
            droplet = Droplet()
            dna_sequence = droplet.get_dna(seed, self.prng, matrix,
                                           self.header_size)

            # check validity.
            if validity.homopolymer("".join(dna_sequence), self.homopolymer) \
                    and validity.cg_content("".join(dna_sequence), 0.5 + self.gc_content):
                dna_sequences.append(dna_sequence)

            if need_log:
                self.monitor.output(len(dna_sequences), final_count)

        if need_log:
            log.output(
                log.WARN, str(__name__), str(sys._getframe().f_code.co_name),
                "Fountain codes for which the inputted matrix is of full rank in the decoding process are "
                "decodable, the full rank depends on the hyper-parameter \"redundancy\" in the Fountain Codec.\n"
                "Therefore, we strongly recommend that we decode it directly to verify the decodable "
                "of the DNA file before conducting DNA synthesis experiments.")

        self.monitor.restore()

        return dna_sequences
Exemplo n.º 29
0
    def verify_for_list(self, input_list, row=None):
        """
        introduction: Verify the correctness of the list and repair the error information to a certain extent.

        :param input_list: Verifiable list.
                            The data of this matrix contains only 0 or 1 (non-char).
                            Type: One-dimensional list(int).

        :param row: The number of rows of the matrix to which the list belongs.

        :return output_list: List that has been verified even repaired.
                             Type: One-dimensional list(int).
        """
        if row is None:
            if self.need_log:
                log.output(
                    log.NORMAL,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Verify and repair the list.",
                )

        output_list = self.__binary_to_decimal__(input_list)
        # find erasures
        erasure_positions = []
        for index in range(len(output_list)):
            if output_list[index] < 0:
                output_list[index] = 0
                erasure_positions.append(index)
        if len(erasure_positions) > self.check_size:
            if row is not None:
                log.output(
                    log.ERROR,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Row" + str(row) + " has too many erasures to correct!",
                )
            else:
                log.output(
                    log.ERROR,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Too many erasures to correct!",
                )

        syndromes = [
            self.__galois_field_evaluate__(output_list,
                                           self.galois_field_exp[i])
            for i in range(self.check_size)
        ]
        if max(syndromes) == 0:
            output_list = self.__decimal_to_binary__(output_list)
            return output_list

        forney_syndromes = self.__forney_syndromes__(syndromes,
                                                     erasure_positions,
                                                     len(output_list))
        error_positions = self.__find_errors__(forney_syndromes,
                                               len(output_list), row)
        if erasure_positions is None:
            output_list = self.__decimal_to_binary__(output_list)
            return output_list

        if error_positions is None:
            error_positions = []
        output_list = self.__correct_errata__(
            output_list, syndromes, erasure_positions + error_positions)
        if (max([
                self.__galois_field_evaluate__(output_list,
                                               self.galois_field_exp[i])
                for i in range(self.check_size)
        ]) > 0):
            if row is not None:
                log.output(
                    log.WARN,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Row " + str(row) + "could not be correct!",
                )
            else:
                log.output(
                    log.WARN,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Could not be correct!",
                )
        output_list = self.__decimal_to_binary__(output_list)
        return output_list
Exemplo n.º 30
0
    def verify_for_list(self, input_list, row=None):
        """
        introduction: Verify the correctness of the list and repair the error information to a certain extent.

        :param input_list: The binary list requiring validation.
                           Type: One-dimensional list(int).

        :param row: The number of rows of the matrix to which the list belongs.

        :return output_list: List that has been verified and repaired.
                             Type: One-dimensional list(int).
        """
        if row is None:
            if self.need_log:
                log.output(
                    log.NORMAL,
                    str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "Verify and repair the list.",
                )

        input_list.reverse()
        detect_site, output_list, output_list_copy = 0, [], []
        for index in range(0, len(input_list)):
            output_list.append(input_list[index])
            output_list_copy.append(input_list[index])
            if pow(2, detect_site) == index + 1:
                detect_site += 1

        detect_site, parity_list = 0, []
        for parity in range(0, (len(output_list))):
            if pow(2, detect_site) == parity + 1:
                start_index = pow(2, detect_site) - 1
                index = start_index
                xor = []

                while index < len(output_list):
                    block = output_list[index:index + pow(2, detect_site)]
                    xor.extend(block)
                    index += pow(2, detect_site + 1)

                for xor_index in range(1, len(xor)):
                    output_list[start_index] = output_list[start_index] ^ xor[
                        xor_index]
                parity_list.append(output_list[parity])
                detect_site += 1
        parity_list.reverse()
        error = sum(
            int(parity_list) * pow(2, index)
            for index, parity_list in enumerate(parity_list[::-1]))

        if error == 0:
            input_list.reverse()
            return input_list
        elif error >= len(output_list_copy):
            log.output(
                log.WARN,
                str(__name__),
                str(sys._getframe().f_code.co_name),
                "Multiple errors can be detected, but due to the limitation of error-correction settings, the errors cannot be located.",
            )
        else:
            if self.need_log:
                if row is not None:
                    log.output(
                        log.WARN,
                        str(__name__),
                        str(sys._getframe().f_code.co_name),
                        "Error is No. " + str(len(output_list_copy) - error) +
                        "bit, in " + str(row + 1) +
                        " of matrix, and it is repaired.",
                    )
                else:
                    log.output(
                        log.WARN,
                        str(__name__),
                        str(sys._getframe().f_code.co_name),
                        "Error is No. " + str(len(output_list_copy) - error) +
                        "bit, and it is repaired.",
                    )

            if output_list_copy[error - 1] == 0:
                output_list_copy[error - 1] = 1
            else:
                output_list_copy[error - 1] = 0
            # output_list_copy[error - 1] = int(output_list_copy[error - 1] is False)
            output_list_copy.reverse()
            return output_list_copy