def create_fasta_from_fastq(fastq_filename: str): out_lines = [] rules = FastDNARules() with open(fastq_filename, "r") as in_file: lines = in_file.readlines()[1:][::4] for line in lines: err = rules.apply_all_rules(line[18:182]) if err < 1.0: out_lines.append(line[18:182]) with open("R:/out.fasta", "w") as out_file: for line in out_lines: out_file.write(">todo\n") out_file.write(line + "\n") return lines
def main(file, number_of_chunks: int = 0, chunk_size: int = 0, error_correction: typing.Callable = nocode, as_dna: bool = False, insert_header: bool = False, save_number_of_chunks=False): if chunk_size != 0: number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunk_size) if as_dna: rules = FastDNARules() else: rules = None dist = ErlichZielinskiRobustSolitonDistribution(number_of_chunks, seed=2) encoder = LTEncoder(file, number_of_chunks, dist, insert_header=insert_header, rules=rules, error_correction=error_correction, number_of_chunks_len_format="H", id_len_format="I", used_packets_len_format="H", save_number_of_chunks_in_packet=save_number_of_chunks, implicit_mode=False) encoder.encode_to_packets() print("Number of Chunks=%s" % encoder.number_of_chunks) encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna)
def encode(p_output, file, as_dna=True, error_correction=nocode, insert_header=False, save_number_of_chunks_in_packet=False, overhead=6.0, clear_output=False): number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, CHUNK_SIZE) dist = RaptorDistribution(number_of_chunks) dna_rules = FastDNARules() if as_dna: rules = dna_rules else: rules = None x = RU10Encoder( file, number_of_chunks, dist, chunk_size=CHUNK_SIZE, insert_header=insert_header, rules=rules, error_correction=error_correction, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=save_number_of_chunks_in_packet) x.set_overhead_limit(overhead) x.encode_to_packets() p_output.send( [ParallelPacket.from_packet(packet) for packet in x.encodedPackets]) p_output.send("DONE") p_output.close() return 0
def main(file: str, error_correction: typing.Callable[[typing.Any], typing.Any], asdna: bool = True, epsilon: float = 0.06, insert_header: bool = False): dist = OnlineDistribution(epsilon) number_of_chunks = dist.get_size() quality = 7 if asdna: rules = FastDNARules() else: rules = None encoder = OnlineEncoder(file, number_of_chunks, dist, epsilon, quality, error_correction=error_correction, quality_len_format="B", insert_header=insert_header, check_block_number_len_format="H", number_of_chunks_len_format="H", rules=rules, save_number_of_chunks_in_packet=False) encoder.set_overhead_limit(1.70) encoder.encode_file(split_to_multiple_files=True, save_as_dna=asdna) encoder.save_packets(True, save_as_dna=asdna)
def encode(file, asdna=True, error_correction=nocode, insert_header=False, save_number_of_chunks_in_packet=False, save_as_fasta=True, save_as_zip=True, overhead=0.40, epsilon=0.068, quality=7, upper_bound=1.0): dist = OnlineDistribution(epsilon) number_of_chunks = dist.get_size() dna_rules = FastDNARules() if asdna: rules = dna_rules else: rules = None encoder = OnlineEncoder( file, number_of_chunks, dist, epsilon, quality, error_correction=error_correction, quality_len_format="B", insert_header=insert_header, check_block_number_len_format="H", number_of_chunks_len_format="H", rules=rules, save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, drop_upper_bound=upper_bound) # , pseudo_decoder=pseudo) encoder.set_overhead_limit(overhead) #encoder.encode_file(split_to_multiple_files=True, save_as_dna=asdna) encoder.encode_to_packets() if save_as_fasta: encoder.save_packets_fasta(file_ending="_Online", seed_is_filename=True) elif save_as_zip: encoder.save_packets_zip(save_as_dna=True, file_ending="_Online", seed_is_filename=True) else: encoder.save_packets(True, save_as_dna=True, seed_is_filename=True, clear_output=True) encoder.save_packets(split_to_multiple_files=True, save_as_dna=True) print("Number of Chunks=%s" % encoder.number_of_chunks) return encoder
def get_error_sum(file, number_of_chunks, chunk_size, seq_seed=None, while_count=1000): max_seed = np.power(2, 8 * struct.calcsize(SEED_LEN_FORMAT)) dist = RaptorDistribution(number_of_chunks) dna_rules = FastDNARules() error_correction = lambda x: reed_solomon_encode(x, NO_REPAIR_SYMBOLS) encoder = RU10Encoder( file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=INSERT_HEADER, rules=dna_rules, error_correction=error_correction, id_len_format=SEED_LEN_FORMAT, number_of_chunks_len_format=NUMBER_OF_CHUNKS_LEN_FORMAT, save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, prepend="", append="") encoder.prepare() i = 0 res = [] while i < while_count: if seq_seed is not None: if seq_seed + i >= max_seed: break packet = encoder.create_new_packet(seed=seq_seed + i) else: packet = encoder.create_new_packet() should_drop_packet(dna_rules, packet) res.append(packet.error_prob) return res
def encode(file, asdna=True, chunk_size=DEFAULT_CHUNK_SIZE, error_correction=nocode, insert_header=False, save_number_of_chunks_in_packet=False, mode_1_bmp=False, prepend="", append="", upper_bound=0.5, save_as_fasta=True, save_as_zip=True, overhead=0.40): number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunk_size) dist = RaptorDistribution(number_of_chunks) if asdna: rules = FastDNARules() else: rules = None x = RU10Encoder( file, number_of_chunks, dist, insert_header=insert_header, pseudo_decoder=None, chunk_size=0, rules=rules, error_correction=error_correction, packet_len_format=PACKET_LEN_FORMAT, crc_len_format=CRC_LEN_FORMAT, number_of_chunks_len_format=NUMBER_OF_CHUNKS_LEN_FORMAT, id_len_format=ID_LEN_FORMAT, save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, mode_1_bmp=mode_1_bmp, prepend=prepend, append=append, drop_upper_bound=upper_bound) x.set_overhead_limit(overhead) x.encode_to_packets() if save_as_fasta and asdna: x.save_packets_fasta(file_ending="_RU10", seed_is_filename=True) elif save_as_zip: x.save_packets_zip(save_as_dna=asdna, file_ending="_RU10", seed_is_filename=True) else: x.save_packets(True, save_as_dna=asdna, seed_is_filename=True, clear_output=True) return x
def fix_(in_file, out_file_str): correct = [] rule = FastDNARules() with open(in_file, "r") as inf: lines = inf.readlines() for line in lines[1::2]: line = line.strip() if len(line) != 164: continue err_prob = rule.apply_all_rules(line) if err_prob < 1.0: correct.append((line, err_prob)) with open(out_file_str, "w") as out_file: for line, err_prob in correct: out_file.write(f">%s\n" % err_prob) out_file.write(line.strip().replace("\n", "") + "\n") cleaned = find_dup_ids(out_file_str) with open(out_file_str, "w") as out_file: for line in cleaned: out_file.write(f">abc\n") out_file.write(line.strip().replace("\n", "") + "\n")
def encode(file, dist_lst, asdna=True, chunk_size=50): """ :param file: :param dist_lst: :param asdna: :param chunk_size: :return: """ packets_needed = 0 packets = dict() number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size) dist = RaptorDistribution(number_of_chunks) dist.f = dist_lst d = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40] dist.d = d dna_rules = FastDNARules() if asdna: rules = dna_rules else: rules = None x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=False, rules=rules, error_correction=nocode, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=False, mode_1_bmp=False) x.prepare() y = RU10Decoder.pseudo_decoder(x.number_of_chunks, False) if y.distribution is None: # self.isPseudo and y.distribution = RaptorDistribution(x.number_of_chunks) y.distribution.f = dist_lst y.distribution.d = d y.number_of_chunks = x.number_of_chunks _, y.s, y.h = intermediate_symbols(x.number_of_chunks, y.distribution) y.createAuxBlocks() n = 0 for p_tmp in range(45): packets[p_tmp] = list() while n < number_of_chunks * 50: pack = x.create_new_packet() if packets_needed == 0: y.input_new_packet(pack) should_drop_packet(dna_rules, pack) if pack.get_degree() not in packets: packets[pack.get_degree()] = list() packets[pack.get_degree()].append(pack.error_prob) n += 1 if n >= number_of_chunks and y.is_decoded() and packets_needed == 0: packets_needed = n # we dont want to break, we want to generate #chunks * XXX packets! # break print("Packets created: " + str(sum([len(x) for x in packets.values()]))) return packets, (packets_needed - number_of_chunks) / 100.0
def create_packets_e_prob(start_num: int, normed_dist: ndarray, number_of_packets: int, rules=None): dist_obj = RaptorDistribution(__NUM_CHUNKS) dist_obj.f = normed_dist dist_obj.d = [x for x in range(0, 41)] encoder = RU10Encoder(file=__FILE, number_of_chunks=__NUM_CHUNKS, distribution=dist_obj, insert_header=False) encoder.prepare() if rules is None: rules = FastDNARules() packets_e_prob = [] for i in range(start_num, start_num + number_of_packets): packet = encoder.create_new_packet(seed=i) should_drop_packet(rules, packet) packets_e_prob.append(packet.error_prob) del packet del encoder return packets_e_prob
def main(in_file: str, num_chunks=0, chunk_size=0, as_dna=True, err_correction: typing.Callable[[typing.Any], typing.Any] = nocode, insert_header=False, save_number_of_chunks_in_packet=False, mode_1_bmp=False): if chunk_size != 0: num_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( in_file, chunk_size) dist = RaptorDistribution(num_chunks) elif num_chunks != 0: dist = RaptorDistribution.RaptorDistribution(num_chunks) else: print("Aborting. Please set either chunk_size or number_of_chunks!") return if as_dna: rules = FastDNARules() else: rules = None x = RU10Encoder( in_file, num_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=err_correction, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, mode_1_bmp=mode_1_bmp) x.encode_to_packets() x.save_packets(True, save_as_dna=as_dna, seed_is_filename=False) conf = { 'error_correction': e_correction, 'repair_symbols': norepair_symbols, 'asdna': as_dna, 'number_of_splits': 0, 'find_minimum_mode': True, 'seq_seed': False } x.save_config_file(conf, section_name="RU10_" + in_file)
def encode(self, file, asdna=True, error_correction=nocode, insert_header=False, save_number_of_chunks_in_packet=False, mode_1_bmp=False, chunk_size=50): packets_needed = 0 packets = dict() number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size) dist = RaptorDistribution(number_of_chunks) dist.f = self.X dist.d = self.d dna_rules = FastDNARules() if asdna: rules = dna_rules else: rules = None x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=error_correction, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, mode_1_bmp=mode_1_bmp) x.prepare() y = RU10Decoder.pseudo_decoder(x.number_of_chunks, False) if y.distribution is None: # self.isPseudo and y.distribution = RaptorDistribution(x.number_of_chunks) y.distribution.f = self.X y.distribution.d = self.d y.number_of_chunks = x.number_of_chunks _, y.s, y.h = intermediate_symbols(x.number_of_chunks, y.distribution) y.createAuxBlocks() n = 0 for p_tmp in range(45): packets[p_tmp] = list() while n < number_of_chunks * 50: pack = x.create_new_packet() if packets_needed == 0: y.input_new_packet(pack) should_drop_packet(dna_rules, pack) if pack.get_degree() not in packets: packets[pack.get_degree()] = list() packets[pack.get_degree()].append(pack.error_prob) n += 1 if n >= number_of_chunks and y.is_decoded() and packets_needed == 0: packets_needed = n # we dont want to break, we want to generate #chunks * XXX packets! # break print("Packets created: " + str(sum([len(x) for x in packets.values()]))) return packets, (packets_needed - number_of_chunks) / 100.0
def encode(file, chunk_size, dist, as_dna=True, repeats=15): """ Encodes the file to packets until the pseudo decoder was able to decode it 'repeats' times with the given chunk size and the distribution list. :param file: File to encode. :param chunk_size: Chunksize to use. :param dist: The distribution to calculate the average error and overhead for. :param as_dna: If true uses the DNA Rules. :param repeats: Number of En-/Decoding cycles. :return: """ degree_dict = {} overhead_lst = [] number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size, insert_header=False) distribution = RaptorDistribution(number_of_chunks) distribution.f = dist distribution.d = [x for x in range(0, 41)] if as_dna: rules = FastDNARules() else: rules = None encoder = RU10Encoder(file, number_of_chunks, distribution, insert_header=False, rules=rules, error_correction=nocode, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=False, mode_1_bmp=False) encoder.prepare() for _ in range(0, repeats): encoder.random_state = np.random.RandomState() # print("Master-Seed used: " + str(encoder.random_state.get_state()[1][0])) pseudo_decoder = create_pseudo_decoder(encoder.number_of_chunks, distribution) needed_packets = 0 while pseudo_decoder.GEPP is None or not pseudo_decoder.is_decoded(): needed_packets += 1 packet = encoder.create_new_packet() pseudo_decoder.input_new_packet(packet) should_drop_packet(rules, packet) if packet.get_degree() not in degree_dict: degree_dict[packet.get_degree()] = list() degree_dict[packet.get_degree()].append(min(packet.error_prob, 1.0)) overhead = (needed_packets - encoder.number_of_chunks) / 100.0 overhead_lst.append(overhead) return sum(overhead_lst) / len(overhead_lst), degree_dict
def test_suite(as_dna, decoder_instance): dir_path = os.getcwd() try: os.remove(dir_path + "/" + file) except: print("Not deleting, File did not exists") shutil.copyfile(dir_path + "/" + cmp_file, dir_path + "/" + file) print(as_dna) chunksize = 200 number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunksize) dist = RaptorDistribution(number_of_chunks) pseudo_decoder = decoder_instance.pseudo_decoder( number_of_chunks=number_of_chunks) rules = FastDNARules() if as_dna else None encoder = RU10Encoder(file, number_of_chunks, dist, pseudo_decoder=pseudo_decoder, rules=rules, id_len_format="H", number_of_chunks_len_format="H", insert_header=True) encoder.encode_to_packets() encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna) assert (pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount() == pseudo_decoder.number_of_chunks) assert os.path.exists(out_dir) decoder = decoder_instance(out_dir) decoder.decodeFolder(id_len_format="H", number_of_chunks_len_format="H") if isinstance(decoder, RU10BPDecoder): for pack in encoder.encodedPackets: decoder.input_new_packet(pack) assert decoder.is_decoded() and decoder.getSolvedCount( ) == encoder.number_of_chunks os.remove(file) decoder.saveDecodedFile(print_to_output=False) assert os.path.exists(file) and filecmp.cmp(file, cmp_file) shutil.rmtree(out_dir)
index.append(key) data.append(val) fig, (ax) = plt.subplots(ncols=1) ax.boxplot(data) ax.set_xticklabels(index) plt.show() plt.plot(num_list) plt.plot(mean_list) plt.show() if __name__ == "__main__": file = "../.INFILES/Dorn" chunk_size = 100 norepairsymbols = 6 save_number_of_chunks_in_packet = False insert_header = False rules = FastDNARules() error_correction = lambda x: reed_solomon_encode(x, norepairsymbols) number_of_chunks = 50 if chunk_size != 0: number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size) dist = RaptorDistribution(number_of_chunks) x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=error_correction, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=save_number_of_chunks_in_packet) aa = QualityPacketGen(x)
def test_homopolymers(params): assert fast_comp(DNARules.homopolymers(params[0]), 3) == fast_comp( FastDNARules.homopolymers(params[0])) == params[1]
def test_c_permutation(params): assert fast_comp(DNARules.c_permutation(params[0])) == fast_comp( FastDNARules.c_permutation(params[0])) == params[1]
def test_random_permutations(params): assert DNARules.random_permutations( params[0]) == FastDNARules.random_permutations(params[0]) == params[1]
def test_illegal_symbols(params): assert DNARules.illegal_symbols(params[0]) == FastDNARules.illegal_symbols( params[0]) == params[1]
def test_trinucleotid_runs(params): assert fast_comp(DNARules.trinucleotid_runs(params[0])) == fast_comp(FastDNARules.trinucleotid_runs(params[0])) == \ params[1]
def test_long_strands(params): assert abs( DNARules.long_strands(params) - FastDNARules.long_strands(params) < 0.01)
def test_repeat_region(params): assert fast_comp(DNARules.repeatRegion(params[0])) == fast_comp( FastDNARules.repeatRegion(params[0])) == params[1]
def test_small_repeat_region(params): assert fast_comp(DNARules.smallRepeatRegion(params[0])) == fast_comp(FastDNARules.smallRepeatRegion(params[0])) == \ params[1]
def test_gc_content(params): assert round(DNARules.overall_gc_content(params[0]), 2) == round(FastDNARules.overall_gc_content(params[0]), 2) == \ params[1]
def test_simple_motif_search(params): assert fast_comp(DNARules.simple_motif_search(params[0])) == fast_comp( FastDNARules.simple_motif_search(params[0])) == params[1]
def test_motif_regex_search(params): assert fast_comp(DNARules.motif_regex_search(params[0])) == fast_comp(FastDNARules.motif_regex_search(params[0])) == \ params[1]
def test_suite(as_dna, decoder_instance, distribution, use_header, implicit_mode): dir_path = os.getcwd() try: os.remove(dir_path + "/" + file) except: print("Not deleting, File did not exists") shutil.copyfile(dir_path + "/" + cmp_file, dir_path + "/" + file) chunksize = 200 number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunksize) pseudo_decoder = decoder_instance.pseudo_decoder(number_of_chunks) if distribution == "robust": dist = RobustSolitonDistribution(S=number_of_chunks, delta=0.2, seed=2) elif distribution == "ideal": dist = IdealSolitonDistribution(S=number_of_chunks, seed=2) else: dist = ErlichZielinskiRobustSolitonDistribution(k=number_of_chunks, delta=0.2, seed=2) rules = FastDNARules() if as_dna else None encoder = LTEncoder(file, number_of_chunks, dist, chunk_size=chunksize, pseudo_decoder=pseudo_decoder, rules=rules, insert_header=use_header, number_of_chunks_len_format="H", id_len_format="H", used_packets_len_format="H", implicit_mode=implicit_mode) encoder.encode_to_packets() encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna) assert pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount( ) == encoder.number_of_chunks assert os.path.exists(out_dir) decoder = decoder_instance(out_dir, use_headerchunk=use_header, dist=dist, implicit_mode=implicit_mode) decoder.decodeFolder(number_of_chunks_len_format="H", seed_len_format="H", degree_len_format="H") assert decoder.is_decoded() and decoder.getSolvedCount( ) == encoder.number_of_chunks os.remove(file) decoder.saveDecodedFile(print_to_output=False) if not use_header: out_file = "DEC_LT_" + file else: out_file = file assert os.path.exists(out_file) and filecmp.cmp(out_file, cmp_file) if decoder_instance == LTBPDecoder: # since ApproxDecoder defines an upper bound Gauss-Decoder MUST be able to decode! decoder = LTDecoder(out_dir, use_headerchunk=use_header, dist=dist, implicit_mode=implicit_mode) decoder.decodeFolder(number_of_chunks_len_format="H", seed_len_format="H", degree_len_format="H") assert (decoder.is_decoded() and decoder.getSolvedCount() == encoder.number_of_chunks) os.remove(out_file) decoder.saveDecodedFile(print_to_output=False) assert os.path.exists(out_file) and filecmp.cmp(out_file, cmp_file) shutil.rmtree(out_dir)
def run(seq_seed=None, file='logo.jpg', repair_symbols=2, insert_header=False, error_correction=reed_solomon_encode, save_number_of_chunks_in_packet=False, l_size=1000, while_count=1000, chunk_size=0, number_of_chunks=300, prepend="", append="", seed_len_format=DEFAULT_ID_LEN_FORMAT, number_of_chunks_len_format=DEFAULT_NUMBER_OF_CHUNKS_LEN_FORMAT, method='RU10', mode1bmp=False, drop_above=0.4, packets_to_create=None): global counter if chunk_size != 0: number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunk_size) dna_rules = FastDNARules() if packets_to_create is None: packets_to_create = math.pow(2, 8 * struct.calcsize(seed_len_format)) rules = dna_rules if repair_symbols != 0: dist, error_correction = get_err_dist(method, number_of_chunks, repair_symbols) else: dist = RaptorDistribution(number_of_chunks) if method == 'RU10': x = RU10Encoder( file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=error_correction, id_len_format=seed_len_format, number_of_chunks_len_format=number_of_chunks_len_format, save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, mode_1_bmp=mode1bmp, prepend=prepend, append=append) x.prepare() elif method == 'LT': x = LTEncoder( file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=error_correction, number_of_chunks_len_format=number_of_chunks_len_format, id_len_format=seed_len_format, save_number_of_chunks_in_packet=save_number_of_chunks_in_packet) x.prepareEncoder() elif method == 'Online': number_of_chunks = dist.get_size() x = OnlineEncoder( file, number_of_chunks, dist, ONLINE_EPS, ONLINE_QUALITY, error_correction=error_correction, quality_len_format="B", insert_header=False, check_block_number_len_format=seed_len_format, number_of_chunks_len_format=number_of_chunks_len_format, rules=rules, save_number_of_chunks_in_packet=False) x.prepare() else: raise NotImplementedError("Choose: RU10, LT or Online") i = 0 tmp_list = [] while i < while_count: if seq_seed is not None: if seq_seed + i >= packets_to_create: break packet = x.create_new_packet(seed=seq_seed + i) else: packet = x.create_new_packet() if i == 0: print(f"%i , %s" % (len( packet.get_dna_struct(True)), packet.get_dna_struct(True))) _ = should_drop_packet(rules, packet) if packet.error_prob <= drop_above and ( len(tmp_list) < l_size or packet.error_prob < tmp_list[-1].error_prob): if packet not in tmp_list: bisect.insort_left(tmp_list, packet) else: elem = next((x for x in tmp_list if x == packet), None) if packet < elem: tmp_list.remove(elem) del elem bisect.insort_left(tmp_list, packet) if len(tmp_list) > l_size: for ele1m in tmp_list[l_size + 1:]: del ele1m tmp_list = tmp_list[:l_size] else: del packet i += 1 # += operation is not atomic, so we need to get a lock: with counter.get_lock(): counter.value += 1 # save_packets_fasta(tmp_list, out_file=method + "_out_partial", file_ending="." + method + "_DNA", # clear_output=False) conf = { 'error_correction': error_correction, 'repair_symbols': repair_symbols, 'number_of_splits': _number_of_splits, 'find_minimum_mode': True, 'seq_seed': seq_seed } # x.save_config_file(conf, section_name=method + "_" + file) if x.progress_bar is not None: x.progress_bar.finish() return [ParallelPacket.from_packet(p) for p in tmp_list]
def run(seq_seed=None, file='logo.jpg', asdna=True, insert_header=False, error_correction=reed_solomon_encode, save_number_of_chunks_in_packet=False, l_size=1000, while_count=1000, chunk_size=0, number_of_chunks=300, prepend="", append="", seed_len_format=ID_LEN_FORMAT, drop_above=1.0): if chunk_size != 0: number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunk_size) dist = RaptorDistribution(number_of_chunks) dna_rules = FastDNARules() if asdna: rules = dna_rules else: rules = None x = RU10Encoder( file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=error_correction, id_len_format=seed_len_format, number_of_chunks_len_format=NUMBER_OF_CHUNKS_LEN_FORMAT, save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, prepend=prepend, append=append) x.prepare() i = 0 tmp_list = [] while i < while_count: if seq_seed is not None: if seq_seed + i >= NUMBER_OF_PACKETS_TO_CREATE: break packet = x.create_new_packet(seed=seq_seed + i) else: packet = x.create_new_packet() should_drop_packet(rules, packet) if packet.error_prob <= drop_above and ( len(tmp_list) < l_size or packet.error_prob < tmp_list[-1].error_prob): if packet not in tmp_list: bisect.insort_left(tmp_list, packet) else: elem = next((x for x in tmp_list if x == packet), None) if packet < elem: tmp_list.remove(elem) bisect.insort_left(tmp_list, packet) if len(tmp_list) > l_size: tmp_list = tmp_list[:l_size] i += 1 print([x.error_prob for x in tmp_list]) conf = { 'error_correction': e_correction, 'repair_symbols': _repair_symbols, 'asdna': asdna, 'number_of_splits': _number_of_splits, 'find_minimum_mode': True, 'seq_seed': seq_seed } x.save_config_file(conf, section_name="RU10_" + file) return [ParallelPacket.from_packet(p) for p in tmp_list]
def test_windowed_gc_content(params): assert abs( DNARules.windowed_gc_content(params) - FastDNARules.windowed_gc_content(params)) == 0