def main(args): """ @param args: positional and flaglike arguments """ # read the arguments input_filename = os.path.abspath(os.path.expanduser(args.infile)) output_directory = os.path.abspath(os.path.expanduser(args.outdir)) force = args.force # make sure that the output directory exists if not os.path.isdir(output_directory): if force: os.makedirs(output_directory) if not os.path.isdir(output_directory): msg = 'output directory does not exist: ' + output_directory raise Exception(msg) # scan the input file for chromosome names ch_paths = [] skimmer = DGRP.ChromoSkimmer() with open(input_filename) as fin: for chromo_name in skimmer.skim(gen_untyped_rows(fin)): output_filename = args.out_prefix + chromo_name + args.out_suffix ch_path = os.path.join(output_directory, output_filename) ch_paths.append(ch_path) if not force: if os.path.exists(ch_path): raise Exception('output already exists: ' + ch_path) chromo_names = skimmer.name_list nlines = skimmer.linecount # start the progress bar nticks = 2 * nlines pbar = Progress.Bar(nticks) # scan the input file for correct types and for monotonicity with open(input_filename) as fin: for i in DGRP.check_chromo_monotonicity(gen_typed_rows(fin)): pbar.increment() # create the files open for writing ch_files = [] for p in ch_paths: ch_files.append(open(p, 'wt')) # write the headers if not args.noheader: for f in ch_files: f.write(g_header + '\n') # write the lines name_to_file = dict(zip(chromo_names, ch_files)) with open(input_filename) as fin: for row in gen_typed_rows(fin): name = row[0] row_out = convert_row(row) f = name_to_file[name] line_out = '\t'.join(str(x) for x in row_out) f.write(line_out + '\n') pbar.increment() # close the files for f in ch_files: f.close()
def get_response_content(fs): # quickly skim the lines to get some info fin = StringIO(fs.data_in) skimmer = DGRP.ChromoSkimmer() for chromo_name in skimmer.skim(gen_untyped_rows(fin)): pass chromo_names = skimmer.name_list nlines = skimmer.linecount # check formatting and monotonicity fin = StringIO(fs.data_in) for i in DGRP.check_chromo_monotonicity(gen_typed_rows(fin)): pass # begin writing out = StringIO() print >> out, 'writing the first of', len(chromo_names), 'chromosomes:' print >> out # write only the first chromosome fin = StringIO(fs.data_in) print >> out, g_header for row in gen_typed_rows(fin): name = row[0] if name == chromo_names[0]: print >> out, '\t'.join(str(x) for x in convert_row(row)) return out.getvalue()