def save_vcf_sample_name_txt(self): ''' ''' # exist or not, vcf_sample_name_file if os.path.isfile(self.vcf_sample_name_file): log.info("found.\n{}".format(self.vcf_sample_name_file)) # Make a backup of vcf_sample_name_file # as it may have been edited by the user utl.save_to_tmpfile(self.vcf_sample_name_file, True, True) else: sample_name_list = list() # if not, read vcf and pick sample_name log.info("not found {}.".format(self.vcf_sample_name_file)) sample_name_list += [ "#{}\t{}\t{}\t{}\t{}".format('no', 'group', 'nickname', 'basename', 'fullname') ] sample_name_list += self._pick_vcf_sample_list(self.vcf_file_path) # backup utl.save_to_tmpfile(self.vcf_sample_name_file) # write to vcf_sample_name_file with open(self.vcf_sample_name_file, mode='w') as f: f.write("{}\n".format("\n".join(sample_name_list))) log.info("save.\n{}".format(self.vcf_sample_name_file))
def _iterate_vcf(self, vcf_ittr, distin_dict, reg): """ """ pick_mode = distin_dict['pick_mode'] # 辞書のキーが0。名前の文字列を示している。 gr_list = [distin_dict[0], distin_dict[1]] log.info("gr_list {}.".format(gr_list)) # At first, we check difference of genotype between two sample # that described at the beginning of each group top_smpl_list = [ glv.conf.g_members_dict[gr_list[0]][0], glv.conf.g_members_dict[gr_list[1]][0] ] log.info("top_smpl_list {}.".format(top_smpl_list)) # ================================================================ start = time.time() # write out to file out_txt_file = distin_dict['variant']['out_path'] utl.save_to_tmpfile(out_txt_file) # ここがparallele化できるか # f.writeの最後のflash必要か。 with open(out_txt_file, mode='a') as f: # write header f.write("{}\n".format(distin_dict['variant']['hdr_text'])) # access to vcf using iterater for record in vcf_ittr: # 1. Skip same GT between top two sample if self._skip_same_GT_between_top2sample( record, top_smpl_list) > 0: continue # 2. Check GT in your own group if self._skip_different_GT_in_own_group( record, top_smpl_list, gr_list) > 0: continue # 3. Select different allele combination among 2x2 allele asel = AlleleSelect() asel.select_diff_allele(record, top_smpl_list, gr_list) # skip if pick_mode is different # if utl.is_my_pick_mode( # asel.var_type, distin_dict['pick_mode']) != True: # continue # 4. Save variant information as text file for var_type, line in zip(asel.var_types, asel.lines): if utl.is_my_pick_mode(var_type, distin_dict['pick_mode']) == True: f.write("{}\n".format(line)) log.info("variant {} {}".format(utl.elapsed_time(time.time(), start), distin_dict['variant']['base_nam']))
def construct_primer(self): # progress check if utl.progress_check('primer') == False: log.info("progress={} so skip primer.".format(glv.conf.progress)) return log.info("Start processing {}".format('primer')) # for each distinguish_groups for distin_dict in glv.outlist.distin_files: marker_file = distin_dict['marker']['out_path'] df_distin = pd.read_csv(marker_file, sep='\t', header=0, index_col=None) out_txt_file = distin_dict['primer']['out_path'] utl.save_to_tmpfile(out_txt_file) with open(out_txt_file, mode='a') as f: # write header #f.write("{}\n".format(distin_dict['primer']['hdr_text'])) start = time.time() if glv.conf.parallel == True: log.info( "do Parallel cpu {}, parallele {} blast {}".format( glv.conf.thread, glv.conf.parallel_blast_cnt, glv.conf.blast_num_threads)) Parallel( n_jobs=glv.conf.parallel_blast_cnt, backend="threading")( [ delayed(self._loop_primer3_check_blast) \ (distin_dict, marker_df_row, f) \ for marker_df_row in df_distin.itertuples() ] ) else: log.info("do Serial cpu {} / serial {} blast {}".format( glv.conf.thread, 1, glv.conf.blast_num_threads)) for marker_df_row in df_distin.itertuples(): self._loop_primer3_check_blast(distin_dict, marker_df_row, f) utl.sort_file('primer', distin_dict, out_txt_file, 'chrom', 'pos', 'try_cnt', 'number') log.info("primer {} {}".format( utl.elapsed_time(time.time(), start), distin_dict['primer']['base_nam']))
def logging_start(self, mod_name, out_dir, log_dir): file_name = 'vprimer_log.txt' log_file_name = "{}/{}".format(log_dir, file_name) self.config['handlers']['fileHandler']['filename'] = log_file_name # before logging utl.save_to_tmpfile(log_file_name, False) log = LogConf.open_log(mod_name) return log
def _copy_ini_file(self): # ini file self.ini_file_path # out_dir self.out_dir # back up ini_base = os.path.basename(self.ini_file_path) out_dir_ini_file = "{}/{}".format(self.out_dir, ini_base) utl.save_to_tmpfile(out_dir_ini_file) cmd = "cp {} {}".format(self.ini_file_path, out_dir_ini_file) utl.try_exec(cmd)
def _set_primer3_header_dict(self): ''' ''' primer3_header_dict = dict() if os.path.isfile(self.p3_params_file_path): log.info("found {}.".format(self.p3_params_file_path)) # This file may have been edited by the user, so copy it utl.save_to_tmpfile(self.p3_params_file_path, True, True) else: log.info("not found {}.".format(self.p3_params_file_path)) with open(self.p3_params_file_path, mode='w') as f: f.write("{}={}\n".format('#PARAM', 'VALUE')) for key, value in list(self.p3key.items()): f.write("{}={}\n".format(key, value)) # 1.1) open and read parameters with open(self.p3_params_file_path, mode='r') as f: # iterator for r_liner in f: r_line = r_liner.strip() # cr, ws if r_line.startswith('#') or r_line == '': continue r_line = utl.strip_hash_comment(r_line) vname, value = r_line.split('=') if vname == 'PRIMER_PRODUCT_SIZE_RANGE' or \ vname == 'PRIMER_NUM_RETURN': continue primer3_header_dict[vname] = value # constant value for primer3 # PRIMER_FIRST_BASE_INDEX=1 primer3_header_dict['PRIMER_FIRST_BASE_INDEX'] = str(1) # PRIMER_PRODUCT_SIZE_RANGE=???-??? primer3_header_dict['PRIMER_PRODUCT_SIZE_RANGE'] = \ "{}-{}".format(self.min_product_size, self.max_product_size) # PRIMER_NUM_RETURN=1 primer3_header_dict['PRIMER_NUM_RETURN'] = str(1) return primer3_header_dict
def construct_primer(self): proc_name = "primer" log.info("-------------------------------") log.info("Start processing {}\n".format(proc_name)) # stop, action, gothrough ret_status = utl.decide_action_stop(proc_name) if ret_status == "stop": msg = "STOP. " msg += "Current process \'{}\' ".format(proc_name) msg += "has exceeded the User-specified stop point " msg += "\'{}', ".format(glv.conf.stop) msg += "so stop program. exit." log.info(msg) sys.exit(1) elif ret_status == "gothrough": msg = "SKIP \'{}\' proc, ".format(proc_name) msg += "glv.conf.progress = {}, ".format(glv.conf.progress) msg += "glv.conf.stop = {}, ".format(glv.conf.stop) msg += "so skip program." log.info(msg) return # for each distinguish_groups for proc_cnt, distin_dict in enumerate(glv.outlist.distin_files, 1): # logging current target utl.print_distin_info("primer", distin_dict, proc_cnt) marker_file = distin_dict['marker']['out_path'] df_distin = pd.read_csv(marker_file, sep='\t', header=0, index_col=None) out_txt_file = distin_dict['primer']['out_path'] utl.save_to_tmpfile(out_txt_file) with open(out_txt_file, mode='a') as f: # write header #f.write("{}\n".format(distin_dict['primer']['hdr_text'])) start = time.time() if glv.conf.parallel == True: log.info("do Parallel cpu {}, parallel {} blast {}".format( glv.conf.thread, glv.conf.parallel_blast_cnt, glv.conf.blast_num_threads)) Parallel( n_jobs=glv.conf.parallel_blast_cnt, backend="threading")( [ delayed(self._loop_primer3_check_blast) \ (distin_dict, marker_df_row, f) \ for marker_df_row in df_distin.itertuples() ] ) else: log.info("do Serial cpu {} / serial {} blast {}".format( glv.conf.thread, 1, glv.conf.blast_num_threads)) for marker_df_row in df_distin.itertuples(): self._loop_primer3_check_blast(distin_dict, marker_df_row, f) utl.sort_file('primer', distin_dict, out_txt_file, 'chrom', 'pos', 'try_cnt', 'number') log.info("primer {} > {}.txt\n".format( utl.elapsed_time(time.time(), start), distin_dict['primer']['base_nam']))
def design_marker(self): self.enzyme_name_list = glv.conf.enzyme_name_list proc_name = "marker" log.info("-------------------------------") log.info("Start processing {}\n".format(proc_name)) # stop, action, gothrough ret_status = utl.decide_action_stop(proc_name) if ret_status == "stop": msg = "STOP. " msg += "Current process \'{}\' ".format(proc_name) msg += "has exceeded the User-specified stop point " msg += "\'{}', ".format(glv.conf.stop) msg += "so stop program. exit." log.info(msg) sys.exit(1) elif ret_status == "gothrough": msg = "SKIP \'{}\' proc, ".format(proc_name) msg += "glv.conf.progress = {}, ".format(glv.conf.progress) msg += "glv.conf.stop = {}, ".format(glv.conf.stop) msg += "so skip program." log.info(msg) return # Design a fragment sequence for primer3 for proc_cnt, distin_dict in enumerate(glv.outlist.distin_files, 1): # logging current target utl.print_distin_info("marker", distin_dict, proc_cnt) # read variant file variant_file = distin_dict['variant']['out_path'] log.info("variant_file {}".format(variant_file)) df_distin = pd.read_csv( variant_file, sep='\t', header=0, index_col=None) # file name to write out result to text out_txt_file = distin_dict['marker']['out_path'] utl.save_to_tmpfile(out_txt_file) start = time.time() with open(out_txt_file, mode='a') as f: ''' eval_variant.py class EvalVariant(object): def _check_effect_of_enzyme( self, seq_target, enzyme_name_list): http://biopython.org/DIST/docs/cookbook/Restriction.html biopython <= 1.76 for IUPACAmbiguousDNA() multi_site_seq = Seq(seq_target, IUPACAmbiguousDNA()) rb = Restriction.RestrictionBatch(enzyme_name_list) Analong = Restriction.Analysis(rb, multi_site_seq) caps_ResTyp_dict = Analong.with_sites() This RestrictionBatch method sometimes returned slightly inaccurate results when executed in parallel. Therefore, parallel is not used now. ''' #if glv.conf.parallel == True: if False: log.info("do Parallel cpu {} parallel {}".format( glv.conf.thread, glv.conf.parallel_full_thread)) Parallel( n_jobs=glv.conf.parallel_full_thread, backend="threading")( [ delayed(self._loop_evaluate_for_marker) (distin_dict, variant_df_row, f) \ for variant_df_row in df_distin.itertuples() ] ) else: log.info("do Serial cpu 1") # each variant for variant_df_row in df_distin.itertuples(): # Determine if the variant can be used as a marker. # For those that can be marked, prepare the # information for primer3. self._loop_evaluate_for_marker( distin_dict, variant_df_row, f) utl.sort_file( 'marker', distin_dict, out_txt_file, 'chrom', 'pos', 'marker_info', 'string') log.info("marker {} > {}.txt\n".format( utl.elapsed_time(time.time(), start), distin_dict['marker']['base_nam']))
def out_current_settings(self): ''' Output to a file with config (ini format) ''' current_setting_ini = list() whole_command_line = ' '.join(sys.argv) # [vprimer] current_setting_ini.append("{}".format(glv.ini_section)) # date date_stamp = "\n# {}".format(glv.now_datetime_form) current_setting_ini.append(date_stamp) # whole_command_line whole_command_line = "\n# {}".format(whole_command_line) current_setting_ini.append(whole_command_line) current_setting_ini.append("\n#") for vname in self.conf_dict.keys(): if 'chosen' in self.conf_dict[vname]: key_value = "{} = {}".format(vname, self.conf_dict[vname]['chosen']) current_setting_ini.append(key_value) if vname == "ref" or vname == "stop" or \ vname == "product_size" or vname == "enzyme" or \ vname == "group_members" or vname == "blast_distance" or \ vname == "use_joblib_threading": current_setting_ini.append("\n#") # exist or not, self.curr_setting_file_path if os.path.isfile(self.curr_setting_file_path): # If the file exists, move it to bak log.info("found {}".format(self.curr_setting_file_path)) utl.save_to_tmpfile(self.curr_setting_file_path) else: log.info("not found {}".format(self.curr_setting_file_path)) # write to sample_name_file with open(self.curr_setting_file_path, mode='w') as f: # Export while adjusting #line = self._convert_setting_ini(current_setting_ini) #f.write("{}\n".format("\n".join(current_setting_ini))) line = self._convert_setting_ini(current_setting_ini) f.write("{}\n".format(line)) log.info("save {}".format(self.curr_setting_file_path)) # ==== log.info("self.conf_dict=\n{}".format(pprint.pformat(self.conf_dict))) log.info("self.regions_dict=\n{}".format( pprint.pformat(self.regions_dict))) log.info("self.group_members_dict=\n{}".format( pprint.pformat(self.group_members_dict))) log.info("self.distinguish_groups_list=\n{}".format( pprint.pformat(self.distinguish_groups_list)))
def _iterate_vcf(self, vcf_ittr, distin_dict, proc_cnt): """ """ # basic informations gr_list = [distin_dict[0], distin_dict[1]] reg = distin_dict['region'] reg_dict = glv.conf.regions_dict[reg] pick_mode = distin_dict['pick_mode'] indel_size = distin_dict['indel_size'] min_indel_len, max_indel_len = \ [int(i) for i in indel_size.split('-')] # At first, we check difference of genotype between two sample # that described at the beginning of each group top_smpl_list = [ glv.conf.group_members_dict[gr_list[0]][0], glv.conf.group_members_dict[gr_list[1]][0] ] # logging current target utl.print_distin_info("variant", distin_dict, proc_cnt) start = time.time() # File name to export variant out_txt_file = distin_dict['variant']['out_path'] utl.save_to_tmpfile(out_txt_file) #------------------------------------------------------ # To add an allele_int column for all sample # Members of the specified group come first # gr0:s1 g0:s2 g0:s3 g1:s4 g1:s5 g1:s6 s7 s8 s9 s10 sample_nickname_ordered_list, \ sample_fullname_ordered_list = \ utl.get_ordered_sample_list(gr_list) sample_added_header = "{}\t{}".format( distin_dict['variant']['hdr_text'], "\t".join(sample_nickname_ordered_list)) # Can I parallelize here? with open(out_txt_file, mode='a') as f: # write sample added header f.write("{}\n".format(sample_added_header)) # access to vcf using iterater for record in vcf_ittr: # 1. Skip same GT between top two sample if self._skip_same_GT_between_top2sample( record, top_smpl_list) > 0: continue # 2. Check GT in your own group if self._skip_different_GT_in_own_group( record, top_smpl_list, gr_list) > 0: continue # 3. Select different allele combination among 2x2 allele asel = AlleleSelect(min_indel_len, max_indel_len) asel.select_diff_allele(record, top_smpl_list, gr_list) # from record, construct allele_int of the member # who is paying attention allele_int_line = "" # 4. Save variant information as text file for var_type, line in zip(asel.var_types, asel.lines): if utl.is_my_pick_mode(var_type, distin_dict['pick_mode']) == True: # make allele_int line if allele_int_line == "": #self._get_ai_line( allele_int_line = \ self._get_allele_line( record, sample_fullname_ordered_list) # add allele line f.write("{}\t{}\n".format(line, allele_int_line)) log.info("variant {} > {}.txt\n".format( utl.elapsed_time(time.time(), start), distin_dict['variant']['base_nam']))
def print_allele(self): ''' When show_genotype is specified, the genotype of the specified regions and members are output to a file. main variant.py print_allele allele_select.py cls allele_int ''' proc_name = "genotype" log.info("-------------------------------") log.info("Start processing {}\n".format(proc_name)) # header header = list() header += ["CHROM", "POS", "Rlen", "Alen", "diff", "REF", "ALT"] header += glv.conf.group_members_dict['all'] # reader reader = vcfpy.Reader.from_path(glv.conf.vcf_file_path) total_cnt = len(glv.conf.region_name_list) # Save to file for each region for proc_cnt, region_name in enumerate(glv.conf.region_name_list, 1): region = glv.conf.regions_dict[region_name]['reg'] # Create a list of fullname for the specified members sample_fullname_list = list() for nickname in glv.conf.group_members_dict['all']: sample_fullname_list.append(utl.get_fullname(nickname)) # if group priority #sample_fullname_list = \ # utl.get_sample_list_from_groupname( # group_list, "fullname") # out file name outf_pref = "005_genotype" basename = "{}~{}~{}".format(outf_pref, region_name, glv.conf.show_genotype) out_file_path = "{}/{}.txt".format(glv.conf.out_dir_path, basename) # backup utl.save_to_tmpfile(out_file_path) log.info("") log.info("{} / {}, {}({}) > {}".format(proc_cnt, total_cnt, region_name, region, out_file_path)) start = time.time() with open(out_file_path, mode='w') as f: f.write("{}\n".format('\t'.join(map(str, header)))) vcf_ittr = reader.fetch(region) for record in vcf_ittr: # Main informations line = [record.CHROM, record.POS] alt_list = [alt.value for alt in record.ALT] # variant length and diff len_ref = len(record.REF) lens_alt_list = list() for alt in alt_list: lens_alt_list.append(len(alt)) diff_len = abs(len_ref - lens_alt_list[0]) lens_alt = ",".join(map(str, lens_alt_list)) line += [len_ref] line += [lens_alt] line += [diff_len] line += [record.REF] line += [",".join(alt_list)] line += [ AlleleSelect.allele_convert( "{}/{}".format( record.call_for_sample[fn].gt_alleles[0], record.call_for_sample[fn].gt_alleles[1]), glv.conf.show_genotype) for fn in sample_fullname_list ] f.write("{}\n".format('\t'.join(map(str, line)))) log.info("genotype {} > {}.txt\n".format( utl.elapsed_time(time.time(), start), out_file_path))
def format_text(self): # progress check if utl.progress_check('formsafe') == False and \ utl.progress_check('formfail') == False: log.info("progress={} so skip form.".format(glv.conf.progress)) return log.info("Start processing {}".format('formsafe')) # for each distinguish_groups for distin_dict in glv.outlist.distin_files: # read variant file primer_file = distin_dict['primer']['out_path'] df_distin = pd.read_csv(primer_file, sep='\t', header=0, index_col=None) # complete == 1 or == 0 safe = 1 fail = 0 for complete, proc in zip([fail, safe], ['formfail', 'formsafe']): log.info("{} {}".format(complete, proc)) df_distin_complete = \ df_distin[df_distin['complete'] == complete] #------------------------ # check chrom-pos duplicate marker df_chrom_pos = df_distin_complete.loc[:, ['chrom', 'pos']] df_chrom_pos_duplicated = \ df_chrom_pos[df_chrom_pos.duplicated()] duplicate_pos_dict = dict() for c_p_row in df_chrom_pos_duplicated.itertuples(): chrom = c_p_row[1] pos = c_p_row[2] if not chrom in duplicate_pos_dict: duplicate_pos_dict[chrom] = dict() if not pos in duplicate_pos_dict[chrom]: duplicate_pos_dict[chrom][pos] = pos #------------------------ # file name to write out result to text out_txt_file = distin_dict[proc]['out_path'] log.info("out_txt_file={}.".format(out_txt_file)) utl.save_to_tmpfile(out_txt_file) with open(out_txt_file, mode='a') as f: # write header f.write("{}\n".format(distin_dict['formsafe']['hdr_text'])) # each variant for primer_df_row in df_distin_complete.itertuples(): self._prepare_from_primer_file(primer_df_row, distin_dict) self._format_product(duplicate_pos_dict) # 書き出す f.write("{}\n".format(self.line))
def format_text(self): ''' ''' # for each distinguish_groups for proc_cnt, distin_dict in enumerate(glv.outlist.distin_files, 1): # # read primer file primer_file = distin_dict['primer']['out_path'] # read variant file and set allele int informations # to a dictionary. variant_file = distin_dict['variant']['out_path'] df_variant = pd.read_csv(variant_file, sep='\t', header=0, index_col=None) header_list = distin_dict['variant']['hdr_text'].split("\t") existing_column_cnt = len(header_list) # not including REF,ALT #alint_start = existing_column_cnt + 1 - 1 alint_start = existing_column_cnt + 1 variant_alint_dict = dict() alint_list = list() for variant_df_row in df_variant.itertuples(): chrom_name = variant_df_row[1] pos = variant_df_row[2] alint_list = [variant_df_row[6]] alint_list += list(variant_df_row[alint_start:]) if chrom_name not in variant_alint_dict.keys(): variant_alint_dict[chrom_name] = dict() variant_alint_dict[chrom_name][pos] = alint_list #-------------------------------------------------------- df_distin = pd.read_csv(primer_file, sep='\t', header=0, index_col=None) # complete == 1 or == 0 fail = 0 safe = 1 for complete, proc in zip([fail, safe], ['formfail', 'formsafe']): # stop, action, gothrough proc_name = proc ret_status = utl.decide_action_stop(proc_name) if ret_status == "stop": msg = "STOP. " msg += "Current process \'{}\' ".format(proc_name) msg += "has exceeded the User-specified stop point " msg += "\'{}', ".format(glv.conf.stop) msg += "so stop program. exit." log.info(msg) #sys.exit(1) continue elif ret_status == "gothrough": msg = "SKIP \'{}\' proc, ".format(proc_name) msg += "glv.conf.progress = {}, ".format(glv.conf.progress) msg += "glv.conf.stop = {}, ".format(glv.conf.stop) msg += "so skip program." log.info(msg) continue log.info("-------------------------------") log.info("Start processing {} complete={}\n".format( proc_name, complete)) # logging current target sub_proc = "{}_{}".format(proc, complete) utl.print_distin_info(sub_proc, distin_dict, proc_cnt, True) df_distin_complete = \ df_distin[df_distin['complete'] == complete] #------------------------ # check chrom-pos duplicate marker df_chrom_pos = df_distin_complete.loc[:, ['chrom', 'pos']] df_chrom_pos_duplicated = \ df_chrom_pos[df_chrom_pos.duplicated()] duplicate_pos_dict = dict() for c_p_row in df_chrom_pos_duplicated.itertuples(): chrom = c_p_row[1] pos = c_p_row[2] if not chrom in duplicate_pos_dict: duplicate_pos_dict[chrom] = dict() if not pos in duplicate_pos_dict[chrom]: duplicate_pos_dict[chrom][pos] = pos #------------------------ # file name to write out result to text out_txt_file = distin_dict[proc]['out_path'] log.info("out_txt_file={}.".format(out_txt_file)) utl.save_to_tmpfile(out_txt_file) with open(out_txt_file, mode='a') as f: header = distin_dict['formsafe']['hdr_text'] if (proc == "formsafe"): #alint_header = ["targ_ano", "vseq_ano_str"] alint_header = ["vseq_ano_str"] sample_nickname_ordered_list, \ sample_fullname_ordered_list = \ utl.get_ordered_sample_list( [distin_dict[0], distin_dict[1]]) alint_header += sample_nickname_ordered_list header = "{}\t{}".format(header, "\t".join(alint_header)) # write header f.write("{}\n".format(header)) # each variant for primer_df_row in df_distin_complete.itertuples(): chrom_name = primer_df_row[2] pos = primer_df_row[3] self._prepare_from_primer_file(primer_df_row, distin_dict) self._format_product(duplicate_pos_dict) if (proc == "formsafe"): #print("chrom_name={}, pos={}".format( # chrom_name, pos)) #print("{}, {}".format(chrom_name, pos)) #slice_one = variant_alint_dict[chrom_name][pos] #print(type(slice_one)) #pprint.pprint( # variant_alint_dict[chrom_name][pos]) line = "{}\t{}".format( self.line, "\t".join( map(str, variant_alint_dict[chrom_name][pos]))) f.write("{}\n".format(line)) else: # 書き出す f.write("{}\n".format(self.line))
def design_marker(self): # progress check if utl.progress_check('marker') == False: log.info("progress={} so skip variant.".format(glv.conf.progress)) return log.info("Start processing {}".format('marker')) # primer3用フラグメントを作成する # for each distinguish_groups for distin_dict in glv.outlist.distin_files: # read variant file variant_file = distin_dict['variant']['out_path'] log.info("variant_file {}".format(variant_file)) df_distin = pd.read_csv(variant_file, sep='\t', header=0, index_col=None) # Bio.Restriction.Restriction_Dictionary self.enzyme.read_enzyme_file() # file name to write out result to text out_txt_file = distin_dict['marker']['out_path'] utl.save_to_tmpfile(out_txt_file) start = time.time() with open(out_txt_file, mode='a') as f: # write header #f.write("{}\n".format(distin_dict['marker']['hdr_text'])) if glv.conf.parallel == True: log.info("do Parallel cpu {} parallel {}".format( glv.conf.thread, glv.conf.parallele_full_thread)) Parallel( n_jobs=glv.conf.parallele_full_thread, backend="threading")( [ delayed(self._loop_evaluate_for_marker) (distin_dict, variant_df_row, f) \ for variant_df_row in df_distin.itertuples() ] ) else: log.info("do Serial cpu 1") # each variant for variant_df_row in df_distin.itertuples(): # バリアントがマーカーとして使えるかどうか、判断する。 # マーカー化可能なものはprimer3用の情報を準備する。 self._loop_evaluate_for_marker(distin_dict, variant_df_row, f) utl.sort_file('marker', distin_dict, out_txt_file, 'chrom', 'pos', 'marker_info', 'string') log.info("marker {} {}".format( utl.elapsed_time(time.time(), start), distin_dict['marker']['base_nam']))