def calculate(self): results = {} task_index = 0 total_task = len(self.coding_schemes) * len(self.file_paths) for file_name, file_path in self.file_paths.items(): original_bit_segments, bit_size = data_handle.read_bits_from_file(file_path, self.segment_length, self.need_logs) bit_segments_with_indices, index_length = indexer.connect_all(original_bit_segments, self.index_length, self.need_logs) for (scheme_name, coding_scheme), needed_index in zip(self.coding_schemes.items(), self.needed_indices): coding_scheme.need_logs = True if self.need_logs: print(">" * 50) print("*" * 50) print("Run task (" + str(task_index + 1) + "/" + str(total_task) + ").") print("*" * 50) if needed_index: bit_segments = bit_segments_with_indices else: bit_segments = original_bit_segments dna_sequences = coding_scheme.silicon_to_carbon(bit_segments, bit_size)["dna"] gc_distribution = [0 for _ in range(101)] homo_distribution = [0 for _ in range(max(list(map(len, dna_sequences))))] for dna_sequence in dna_sequences: dna_segment = "".join(dna_sequence) gc_content = int(((dna_segment.count("C") + dna_segment.count("G")) / len(dna_segment) * 100) + 0.5) gc_distribution[gc_content] += 1 for homo_length in [h**o + 1 for h**o in range(len(dna_sequence))][::-1]: is_find = False missing_segments = ["A" * homo_length, "C" * homo_length, "G" * homo_length, "T" * homo_length] for missing_segment in missing_segments: if missing_segment in dna_segment: is_find = True homo_distribution[homo_length] += 1 break if is_find: break if self.need_logs: print(">" * 50) print() results["task " + str(task_index)] = { "coding scheme": scheme_name, "file": file_name, "gc": str(gc_distribution).replace(", ", "-"), "h**o": str(homo_distribution).replace(", ", "-") } task_index += 1 self.records["results"] = results
def transcode(self, **info): if "direction" in info: if info["direction"] == "t_c": segment_length = info[ "segment_length"] if "segment_length" in info else 120 self.records["payload length"] = segment_length if "input_path" in info: bit_segments, bit_size = data_handle.read_bits_from_file( info["input_path"], segment_length, self.need_logs) elif "input_string" in info: bit_segments, bit_size = data_handle.read_bits_from_str( info["input_string"], segment_length, self.need_logs) else: raise ValueError("There is no digital data input here!") original_bit_segments = copy.deepcopy(bit_segments) if "index" in info and info["index"]: bit_segments, index_length = indexer.connect_all( bit_segments, self.need_logs) self.records["index length"] = index_length else: self.records["index length"] = 0 if self.error_correction is not None: bit_segments, error_correction_length = self.error_correction.insert( bit_segments) self.records[ "error-correction length"] = error_correction_length else: self.records["error-correction length"] = 0 results = self.coding_scheme.silicon_to_carbon( bit_segments, bit_size) dna_sequences = results["dna"] self.records["information density"] = round(results["i"], 3) self.records["encoding runtime"] = round(results["t"], 3) if "output_path" in info: data_handle.write_dna_file(info["output_path"], dna_sequences, self.need_logs) return {"bit": original_bit_segments, "dna": dna_sequences} elif info["direction"] == "t_s": if "input_path" in info: dna_sequences = data_handle.read_dna_file( info["input_path"], self.need_logs) elif "input_string" in info: dna_sequences = [] for index, string in enumerate(info["input_string"]): dna_sequences.append(string) else: raise ValueError("There is no digital data input here!") original_dna_sequences = copy.deepcopy(dna_sequences) results = self.coding_scheme.carbon_to_silicon(dna_sequences) self.records["decoding runtime"] = round(results["t"], 3) bit_segments = results["bit"] bit_size = results["s"] if not bit_segments: self.records["error rate"] = "100.00%" return {"bit": None, "dna": original_dna_sequences} if self.error_correction is not None: verified_data = self.error_correction.remove(bit_segments) bit_segments = verified_data["bit"] self.records["error rate"] = str( round(verified_data["e_r"] * 100, 2)) + "%" self.records["error indices"] = str(verified_data["e_i"]).replace(", ", "-") \ if verified_data["e_i"] != [] else None self.records["error bit segments"] = str(verified_data["e_bit"]).replace(", ", "-") \ if verified_data["e_bit"] != [] else None else: self.records["error rate"] = None self.records["error indices"] = None self.records["error bit segments"] = None if not bit_segments: return {"bit": None, "dna": original_dna_sequences} if "index" in info and info["index"]: indices, bit_segments = indexer.divide_all( bit_segments, self.need_logs) bit_segments = indexer.sort_order(indices, bit_segments, self.need_logs) if "output_path" in info: data_handle.write_bits_to_file(info["output_path"], bit_segments, bit_size, self.need_logs) elif "output_string" in info: string = data_handle.write_bits_to_str( bit_segments, bit_size, self.need_logs) if self.need_logs: print(string) return {"bit": bit_segments, "dna": original_dna_sequences} else: raise ValueError( "Unknown parameter \"direction\", please use \"t_c\" or \"t_s\"." ) else: raise ValueError( "Unknown parameter \"direction\", please use \"t_c\" or \"t_s\"." )
def test_add_indices(self): i_matrix, _ = connect_all(copy.deepcopy(self.test_o_matrix)) self.assertEqual(i_matrix, self.test_i_matrix)