예제 #1
0
def write_dna_file(path, dna_sequences, need_log=False):
    """
    introduction: Writing DNA sequence set to documents.

    :param path: File path.
                  Type: string

    :param dna_sequences: Generated DNA sequences.
                          Type: one-dimensional list(string)

    :param need_log: choose to output log file or not.
    """

    m = monitor.Monitor()

    try:
        with open(path, "w") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__),
                           str(sys._getframe().f_code.co_name),
                           "Write DNA sequences to file: " + path)
            for row in range(len(dna_sequences)):
                if need_log:
                    m.output(row + 1, len(dna_sequences))
                file.write("".join(dna_sequences[row]) + "\n")
        return dna_sequences
    except IOError:
        log.output(
            log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
            "The file selection operation was not performed correctly. Please execute the operation again!"
        )
예제 #2
0
def get_yyc_rules(need_log=False):
    rules = []
    temp_rule1 = ["".join(x) for x in itertools.product("01", repeat=4)]
    temp_rule2 = ["".join(x) for x in itertools.product("01", repeat=16)]

    m = monitor.Monitor()

    if need_log:
        # noinspection PyProtectedMember
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Find all the available Yin-Yang rules.")

    count, step = 0, 0
    for base in ["A", "T", "C", "G"]:
        for rule1index in range(len(temp_rule1)):
            for rule2index in range(len(temp_rule2)):
                rule1 = list(map(int, list(temp_rule1[rule1index])))
                rule2 = numpy.array(
                    list(map(int, list(temp_rule2[rule2index])))).reshape(
                        4, 4).tolist()
                if _check(rule1, rule2):
                    rules.append(YYCRule(rule1, rule2, base, count))
                    count += 1

                step += 1

                if need_log:
                    m.output(step, len(temp_rule1) * len(temp_rule2) * 4)

    return rules
예제 #3
0
def divide_all(matrix, need_log=False):
    """
    introduction: Separate data from indexes in binary strings.

    :param matrix: The DNA sequence of len(matrix) rows.
                   Type: Two-dimensional list(int).

    :param need_log: need output log.

    :returns index, datas: Obtained data sets and index sets in corresponding locations.
                            Type: One-dimensional list(int), Two-dimensional list(int).
    """
    m = monitor.Monitor()
    index_binary_length = int(len(str(bin(len(matrix)))) - 2)

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Divide index and data from binary matrix.")

    indexs = []
    datas = []

    for row in range(len(matrix)):
        if need_log:
            m.output(row + 1, len(matrix))
        index, data = divide(matrix[row], index_binary_length)
        indexs.append(index)
        datas.append(data)

    m.restore()

    del matrix, m

    return indexs, datas
예제 #4
0
def connect_all(matrix, need_log=False):
    """
    introduction: Integrate index and data from the two-dimensional matrix.

    :param matrix: Data from input.
                   Type: Two-dimensional list(int).

    :param need_log:

    :return new_matrix: Data for output.
                        Type: Two-dimensional list(int).
    """
    m = monitor.Monitor()
    index_binary_length = int(len(str(bin(len(matrix)))) - 2)

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Add index in the binary matrix.")

    new_matrix = []
    for row in range(len(matrix)):
        if need_log:
            m.output(row + 1, len(matrix))
        new_matrix.append(connect(row, matrix[row], index_binary_length))

    m.restore()

    del matrix, m

    return new_matrix
예제 #5
0
    def _sample_init(self, graph):
        '''
        we prepare the sampling process

        - first we expand its edges to nodes, so eden will be able wo work its magic on it
        - then we calculate a score for the graph, to see how much we like it
        - we setup the similarity checker stop condition
        - possibly we are in a multiprocessing process, and this class instance hasnt been used before,
          in this case we need to rebuild the postprocessing function .
        '''
        self.monitorobject=monitor.Monitor(self.monitor)

        self.backtrack=self.maxbacktrack
        self.last_graphman = None

        graphman=self.preprocessor.transform([graph])[0]
        graph = graphman.base_graph()
        if self.max_core_size_diff > -1:
            self.seed_size = len(graph)
        self._score(graphman)
        self._sample_notes = ''
        self._sample_path_score_set = set()
        if self.include_seed==False: # make sure that seed never appears,, may happen if there is nothing happening
            self._sample_path_score_set.add(graphman._score)

        #print 'sample init:',graphman
        #draw.graphlearn_draw(graphman.graph())

        return graphman
예제 #6
0
def sort_order(indexes, data_set, need_log=False):
    """
    introduction: Restore data in order of index.

    :param indexes: The indexes of data set.

    :param data_set: The disordered data set, the locations of this are corresponding to parameter "index".

    :param need_log: need output log.

    :returns matrix: Binary list in correct order.
                      Type: Two-dimensional list(int).
    """
    m = monitor.Monitor()

    if need_log:
        log.output(log.NORMAL, str(__name__),
                   str(sys._getframe().f_code.co_name),
                   "Restore data order according to index.")

    # additional information checker
    flag_index = 0
    if max(indexes) > len(indexes):
        while True:
            if flag_index + 1 not in indexes:
                # index to length
                flag_index += 1
                break
            flag_index += 1

    if need_log and flag_index > 0:
        log.output(
            log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name),
            "There are " + str(flag_index) + " required bit segments and " +
            str(len(indexes) - flag_index) + " additional bit segments")

    # noinspection PyUnusedLocal
    if flag_index > 0:
        matrix = [[0 for _ in range(len(data_set[0]))]
                  for _ in range(flag_index)]
    else:
        matrix = [[0 for _ in range(len(data_set[0]))]
                  for _ in range(len(indexes))]

    for index in range(len(matrix)):
        matrix[index] = data_set[indexes.index(index)]
        if need_log:
            m.output(index + 1, len(matrix))

    m.restore()

    del indexes, data_set, m

    return matrix
예제 #7
0
def write_all_from_binary(path, matrix, size, need_log=False):
    """
    introduction: Writing binary matrix to document.

    :param path: File path.
                  Type: string

    :param matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation.
                    Type: two-dimensional list(int)

    :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files.
                  Type: int

    :param need_log: choose to output log file or not.
    """
    m = monitor.Monitor()

    try:
        with open(path, "wb+") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__),
                           str(sys._getframe().f_code.co_name),
                           "Write file from binary matrix: " + path)

            # Change bit to byte (8 -> 1), and write a file as bytes
            bit_index = 0
            temp_byte = 0
            for row in range(len(matrix)):
                if need_log:
                    m.output(row + 1, len(matrix))
                for col in range(len(matrix[0])):
                    bit_index += 1
                    temp_byte *= 2
                    temp_byte += matrix[row][col]
                    if bit_index == 8:
                        if size >= 0:
                            file.write(struct.pack("B", int(temp_byte)))
                            bit_index = 0
                            temp_byte = 0
                            size -= 1
    except IOError:
        log.output(
            log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
            "The file selection operation was not performed correctly. Please execute the operation again!"
        )
예제 #8
0
def read_dna_file(path, need_log=False):
    """
    introduction: Reading DNA sequence set from documents.

    :param path: File path.
                  Type: string

    :return dna_sequences: A corresponding DNA sequence string in which each row acts as a sequence.
                           Type: one-dimensional list(string)

    :param need_log: need output log.
    """

    m = monitor.Monitor()

    dna_sequences = []

    try:
        with open(path, "r") as file:
            if need_log:
                log.output(log.NORMAL, str(__name__),
                           str(sys._getframe().f_code.co_name),
                           "Read DNA sequences from file: " + path)

            # Read current file by line
            lines = file.readlines()
            for index in range(len(lines)):
                if need_log:
                    m.output(index + 1, len(lines))
                line = lines[index]
                dna_sequences.append(
                    [line[col] for col in range(len(line) - 1)])

        return dna_sequences
    except IOError:
        log.output(
            log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
            "The file selection operation was not performed correctly. Please execute the operation again!"
        )
예제 #9
0
def read_binary_from_all(path, segment_length=120, need_log=False):
    """
    introduction: Reading binary matrix from document.

    :param path: File path.
                  Type: string

    :param segment_length: The binary segment length used for DNA sequence generation.
                           Considering current DNA synthesis technique limitation,
                           we usually set 120 as default segment length.

    :param need_log: choose to output log file or not.

    :return matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation.
                    Type: two-dimensional list(int)
    """

    m = monitor.Monitor()
    try:

        # Open selected file
        with open(path, mode="rb") as file:

            if need_log:
                log.output(log.NORMAL, str(__name__),
                           str(sys._getframe().f_code.co_name),
                           "Read binary matrix from file: " + path)

            size = os.path.getsize(path)

            # Set init storage matrix
            matrix = [[0 for _ in range(segment_length)]
                      for _ in range(math.ceil(size * 8 / segment_length))]

            row = 0
            col = 0
            for byte_index in range(size):
                if need_log:
                    m.output(byte_index + 1, size)
                # Read a file as bytes
                one_byte = file.read(1)
                element = list(
                    map(
                        int,
                        list(
                            str(bin(struct.unpack(
                                "B", one_byte)[0]))[2:].zfill(8))))
                for bit_index in range(8):
                    matrix[row][col] = element[bit_index]
                    col += 1
                    if col == segment_length:
                        col = 0
                        row += 1

        if int(len(str(bin(len(matrix)))) - 2) * 7 > segment_length:
            if need_log:
                log.output(
                    log.WARN, str(__name__),
                    str(sys._getframe().f_code.co_name),
                    "The proportion of index in whole sequence may be high. \n"
                    "It is recommended to increase the length of output DNA sequences "
                    "or to divide the file into more segment pools")

        return matrix, size
    except IOError:
        log.output(
            log.ERROR, str(__name__), str(sys._getframe().f_code.co_name),
            "The file selection operation was not performed correctly. Please execute the operation again!"
        )
예제 #10
0
    def __init__(self,
                 base_reference=None,
                 current_code_matrix=None,
                 support_bases=None,
                 support_spacing=0,
                 max_ratio=0.8,
                 search_count=100,
                 max_homopolymer=math.inf,
                 max_content=1,
                 min_free_energy=None):
        """
        introduction: The initialization method of YYC.

        :param base_reference: Correspondence between base and binary data (RULE 1).
        Make sure that Two of the bases are 1 and the other two are 0, so there are only 6 case.

        :param current_code_matrix: Conversion rule between base and btis based on support and current base (RULE 2).
                                     Label row is the support base, label col is the current base.
                                         A   T   C   G
                                     A   X1  Y1  X2  Y2
                                     T   X3  Y3  X4  Y4
                                     C   X5  Y5  X6  Y6
                                     G   X7  Y7  X8  Y8
                                     Make sure that Xn + Yn = 1 and Xn * Yn = 0, n is in [1, 8].

        :param support_bases: Base replenishment before official data.
                               Make sure that the count of support base must more than support spacing.
                               Make sure that the number range of each position is {0, 1, 2, 3}, reference base index.

        :param support_spacing: Spacing between support base and current base.
                                 When the support base is the front of the current base, the spacing is 0.

        :param max_ratio: The max ratio of 0 or 1.
                           When the (count/length) >= this parameter, we decide that this binary sequence is not good.

        :param max_homopolymer: maximum length of homopolymer.

        :param max_content: maximum content of C and G, which means GC content is in [1 - max_content, max_content].

        :param min_free_energy: the free energy of DNA sequence is lower than required min free energy.
        """

        # Set default values for Rules 1 and 2 (RULE 495)
        if not base_reference:
            base_reference = [0, 1, 0, 1]
        if not current_code_matrix:
            current_code_matrix = [
                [1, 1, 0, 0],
                [1, 0, 0, 1],
                [1, 1, 0, 0],
                [1, 1, 0, 0],
            ]
        if not support_bases:
            support_bases = [index_base[0]]

        # Assign input data to class variables
        self.base_reference = base_reference
        self.current_code_matrix = current_code_matrix
        self.support_bases = support_bases
        self.support_spacing = support_spacing
        self.max_ratio = max_ratio
        self.search_count = search_count

        self.max_homopolymer = max_homopolymer
        self.max_content = max_content
        self.min_free_energy = min_free_energy

        # Detect parameters correctness
        self._init_check()

        self.file_size = 0
        self.monitor = monitor.Monitor()