def main(readlib, cmapfile, matesfile, out_dir): LOGFH = open(os.path.join(out_dir, 'parse_mates.LOG'), 'a') LOGFH.write('Start the program to parse [%s] fragments for [%s] ... %s\n' % (readlib, matesfile, str(datetime.datetime.now()))) LOGFH.flush() # Initiate the FragmentParser matesparser = FragmentParser(CHRMAP=cmapfile) # Parse and write to the output files organized by chromosome chrs = matesparser.chr_mapping.values() chrs_fh = generate_fhs(chrs, readlib, out_dir) for chr, fragment in matesparser.parse_mates(matesfile, readlib): fout = chrs_fh[chr] record = fragment.get_bedrecord(chr) fout.write(record + '\n') close_fhs(chrs_fh) LOGFH.write('Finish the program ... %s\n\n' % str(datetime.datetime.now())) LOGFH.close()
def main(readlib, cmapfile, matesfile, out_dir): LOGFH = open(os.path.join(out_dir, 'parse_mates.LOG'), 'a') LOGFH.write( 'Start the program to parse [%s] fragments for [%s] ... %s\n' % (readlib, matesfile, str(datetime.datetime.now()))) LOGFH.flush() # Initiate the FragmentParser matesparser = FragmentParser(CHRMAP=cmapfile) # Parse and write to the output files organized by chromosome chrs = matesparser.chr_mapping.values() chrs_fh = generate_fhs(chrs, readlib, out_dir) for chr, fragment in matesparser.parse_mates(matesfile, readlib): fout = chrs_fh[chr] record = fragment.get_bedrecord(chr) fout.write(record + '\n') close_fhs(chrs_fh) LOGFH.write('Finish the program ... %s\n\n' % str(datetime.datetime.now())) LOGFH.close()
LOGFH.flush() paras = parse_paras(parafile) # 2. Build CpGs LOGFH.write('... Build CpG sites ... %s\n' % str(datetime.datetime.now())) siteparser = SiteParser(chr) siteparser.parse_sites(cpg_file, 'CpGFull') cpg_sites = siteparser.get_sites() # 3. Build RE sites first LOGFH.write('... Build RE sites ... %s\n' % str(datetime.datetime.now())) siteparser = SiteParser(chr) siteparser.parse_sites(re_sitefile, 'RE') re_sites = siteparser.get_sites() del siteparser # 4. Build RE fragments LOGFH.write('... Build RE fragments ... %s\n' % str(datetime.datetime.now())) fragparser = FragmentParser() # Split fragfile into subsets to avoid memory overflow re_linenum = count_file_lines(re_fragfile) refh = open(re_fragfile) re_fraglines = [] pre_count = 0 for count, line in enumerate(refh): re_fraglines.append(line) if (count % 10000 == 0 and count != 0) or count + 1 == re_linenum: re_frags = [fragment for fragment in fragparser.parse_bedfrags(re_fraglines, 'RE')] # 5. Filter RE fragments, cpg_sites are updated LOGFH.write('...... Filter RE fragments from lines [%d, %d] ......\n' % (pre_count, count)) refilter = REFilter(re_frags, cpg_sites, re_sites, paras['outlen'], paras['inlen']) refilter.scan() # Save information for the passed and failed RE fragments write_logfiles(out_dir, re_frags, paras, 're')