def main(): fs = FS() args = get_arguments() #print args src_files = [] if args.recursive: for dirpath, dirnames, filenames in os.walk(args.source_dir): for filename in filenames: if args.file_type is None: src_files.append(os.path.join(dirpath, filename)) elif filename.endswith(args.file_type): src_files.append(os.path.join(dirpath, filename)) else: for filename in next(os.walk(args.source_dir))[2]: if args.file_type is None: src_files.append(os.path.join(args.source_dir, filename)) elif filename.endswith(args.file_type): src_files.append(os.path.join(args.source_dir, filename)) cmds = [] dest_files = [] if not args.mapping_file is None: src2dest = {} content = fs.read2list(args.mapping_file) fs.close() #if args.mapping_header: for i in xrange(0, len(content)): try: (src_filename, dest_filename) = content[i].split() src2dest[src_filename] = dest_filename except Exception as ex: logging.warning( "mapping src file to dest file [empty line?]: " + str(ex)) src_file2dest_file = {} for src_file in src_files: src_filename = os.path.basename(src_file) dest_filename = src_filename if not args.re_list is None: for regular_expression in args.re_list: (find_str, replace_str) = regular_expression.split(":") print find_str, "be replaced with:", replace_str dest_filename = re.sub(find_str, replace_str, dest_filename) dest_file = os.path.join(args.out_dir, dest_filename) src_file2dest_file[src_file] = dest_file elif not args.mapping_file is None: if not src_filename in src2dest: logging.warning( "\"%s\" is NOT listed in the mapping file!!!\n" % src_filename) else: dest_file = os.path.join(args.out_dir, src2dest[src_filename]) src_file2dest_file[src_file] = dest_file else: dest_file = os.path.join(args.out_dir, src_filename) src_file2dest_file[src_file] = dest_file for (src_file, dest_file) in sorted(src_file2dest_file.items()): if not os.path.exists(src_file): logging.error("\"%s\" does not exist!!!\n" % src_file) sys.exit() if args.hard_copy: cmd_line = "cp '%s' '%s'" % (src_file, dest_file) else: cmd_line = "ln -s %s %s" % (src_file, dest_file) logging.info(cmd_line) #print "\n" os.system(cmd_line)
def main(): fs = FS() z = ZIP() args = get_arguments() src_dir = args.src_dir next_seq_merge_dir = os.path.join(src_dir, "next_seq_merge") if not os.path.exists(next_seq_merge_dir): print next_seq_merge_dir, "is created!\n" os.makedirs(next_seq_merge_dir) next_seq_merge_command_sh = os.path.join(next_seq_merge_dir, "next_seq_merge_commands.sh") gz_file_list = [] for root, dirs, files in os.walk(src_dir, topdown=False): for filename in files: if filename.endswith(r".gz"): gz_file_list.append(os.path.join(root, filename)) gz_file_list = sorted(gz_file_list) if args.check_gz: z.test_gz(gz_file_list) #print gz_file_list #sys.exit() #z.test_gz(gz_file_list) #for gz_file in gz_file_list: # test_gz_cmd = "gunzip -t " + gz_file # fs.write(next_seq_merge_command_sh, test_gz_cmd + "\n") #fs.write(next_seq_merge_command_sh, test_gz_cmd + "\n\n") out_gz_list = [] for gz_file in sorted(gz_file_list): if "_L001_" in gz_file: all_lane_file_found = True out_gz = os.path.join( next_seq_merge_dir, os.path.basename(gz_file.replace("_L001_", "_"))) to_cat_list = [gz_file] to_cat_list.append(gz_file.replace("_L001_", "_L002_")) to_cat_list.append(gz_file.replace("_L001_", "_L003_")) to_cat_list.append(gz_file.replace("_L001_", "_L004_")) for to_cat_gz in to_cat_list: if to_cat_gz not in gz_file_list: print "CAN'T find", to_cat_gz all_lane_file_found = False if all_lane_file_found: fs.write(next_seq_merge_command_sh, z.cat_gz(out_gz, to_cat_list) + "\n") out_gz_list.append(out_gz) fs.write(next_seq_merge_command_sh, "\n\n") if args.fastq: for out_gz in out_gz_list: fs.write(next_seq_merge_command_sh, "gunzip " + out_gz + "\n") fs.write(next_seq_merge_command_sh, "\n\n") qsub_cmd = "nohup qsub_cmd.py " + next_seq_merge_command_sh + " >/dev/null 2>&1 &" #print cmd3 fs.write(next_seq_merge_command_sh, "\n#run this: " + qsub_cmd + "\n") fs.close() os.system(qsub_cmd)