def run_main(chunk_json, contigset_output, chunk_key): """run main""" chunks = load_pipeline_chunks_from_json(chunk_json) # Allow looseness if not chunk_key.startswith('$chunk.'): chunk_key = '$chunk.' + chunk_key log.warn("Prepending chunk key with '$chunk.' to '%s'", str(chunk_key)) fasta_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) log.debug("Chunked consensus isoforms files are %s.", (', '.join(fasta_files))) out_fa = CombinedFiles(combined_dir=op.dirname(contigset_output)).all_consensus_isoforms_fa combine_consensus_isoforms(split_indices=range(0, len(fasta_files)), split_files=fasta_files, combined_consensus_isoforms_fa=out_fa) log.info("Combining files to %s.", out_fa) log.info("Writing contigset %s", contigset_output) assert contigset_output.endswith('xml') as_contigset(out_fa, contigset_output) #cs = ContigSet(*fasta_files) #cs.newUuid() #cs.write(contigset_output) return 0
def run_main(fastq_file, gmap_ref_file, output_json_file, max_nchunks): """ Parameters: fastq_file -- HQ isoforms in FASTQ gmap_ref_file -- GMAP reference set xml output_json -- chunk.json """ # Chunk FASTQ output_fastq_json = output_json_file + ".fastq.json" output_dir = op.dirname(output_json_file) CU.write_fastq_chunks_to_file(output_fastq_json, fastq_file, max_nchunks, output_dir, "scattered-fastq", "fastq") # get fastq_ids from output_fastq_json fastq_chunks = load_pipeline_chunks_from_json(output_fastq_json) fastq_files = get_datum_from_chunks_by_chunk_key(fastq_chunks, "$chunk.fastq_id") log.debug("Chunked FASTQ files are %s.", (', '.join(fastq_files))) # Writing chunk.json chunks = [] for i, fastq_file in enumerate(fastq_files): chunk_id = "_".join(["map_isoforms_to_genome_chunk", str(i)]) d = { Constants.CHUNK_KEYS[0]: fastq_file, Constants.CHUNK_KEYS[1]: gmap_ref_file } c = PipelineChunk(chunk_id, **d) chunks.append(c) log.info("Writing chunk.json to %s", output_json_file) write_pipeline_chunks(chunks, output_json_file, "created by %s" % Constants.TOOL_ID) return 0
def run_main(chunk_json, fasta_output, chunk_key): chunks = load_pipeline_chunks_from_json(chunk_json) # Allow looseness if not chunk_key.startswith('$chunk.'): chunk_key = '$chunk.' + chunk_key log.warn("Prepending chunk key with '$chunk.' to '{c}'".format(c=chunk_key)) fastx_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) _ = gather_fasta(fastx_files, fasta_output) return 0
def run_main(chunk_json, fasta_output, chunk_key): chunks = load_pipeline_chunks_from_json(chunk_json) # Allow looseness if not chunk_key.startswith('$chunk.'): chunk_key = '$chunk.' + chunk_key log.warn( "Prepending chunk key with '$chunk.' to '{c}'".format(c=chunk_key)) fastx_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) _ = gather_fasta(fastx_files, fasta_output) return 0
def chunk_contigset(in_file, n_chunks, out_dir, out_chunk_json): """ Chunk input contigset into n_chunks under out_dir, and write chunk info to out_chunk_json, return chunked files. """ log.info("Splitting %s into %s chunks", in_file, str(n_chunks)) CU.write_contigset_chunks_to_file(out_chunk_json, in_file, n_chunks, out_dir, "scattered-nfl", "contigset.xml") out_chunks = load_pipeline_chunks_from_json(out_chunk_json) chunked_files = get_datum_from_chunks_by_chunk_key(out_chunks, '$chunk.contigset_id') log.info("Splitted files are %s\n", ("\n".join(chunked_files))) # Return chunked files from out_chunk_json return chunked_files
def run_main(chunk_json, fofn_output, chunk_key): with cd(os.path.dirname(fofn_output)): chunks = load_pipeline_chunks_from_json(chunk_json) # Allow looseness if not chunk_key.startswith('$chunk.'): chunk_key = '$chunk.' + chunk_key log.warn("Prepending chunk key with '$chunk.' to '{c}'".format(c=chunk_key)) fofn_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) print("fofn_files:%s %s" %(repr(fofn_files), repr(fofn_output))) # Combine all into one. with open(fofn_output, 'w') as ofs: for fn in fofn_files: with open(fn) as ifs: ofs.write(ifs.read())
def run_main(chunk_json, sam_output, chunk_key): """run main""" chunks = load_pipeline_chunks_from_json(chunk_json) # Allow looseness if not chunk_key.startswith('$chunk.'): chunk_key = '$chunk.' + chunk_key log.warn("Prepending chunk key with '$chunk.' to '%s'", str(chunk_key)) sam_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) log.debug("Chunked SAM files are %s.", (', '.join(sam_files))) log.info("Concatenate chunked SAM files to %s.", sam_output) concatenate_sam(sam_files, sam_output) return 0
def run_main(fastq_file, gmap_ref_file, output_json_file, max_nchunks): """ Parameters: fastq_file -- HQ isoforms in FASTQ gmap_ref_file -- GMAP reference set xml output_json -- chunk.json """ # Check size of fastq_file before scattering, so that a meaningful # error message can be displayed instead of 'float division by zero' if os.stat(fastq_file).st_size == 0: raise IOError("Fastq file %s is empty, exiting." % fastq_file) # Chunk FASTQ output_fastq_json = output_json_file + ".fastq.json" output_dir = op.dirname(output_json_file) CU.write_fastq_chunks_to_file(output_fastq_json, fastq_file, max_nchunks, output_dir, "scattered-fastq", "fastq") # get fastq_ids from output_fastq_json fastq_chunks = load_pipeline_chunks_from_json(output_fastq_json) fastq_files = get_datum_from_chunks_by_chunk_key(fastq_chunks, "$chunk.fastq_id") log.debug("Chunked FASTQ files are %s.", (', '.join(fastq_files))) # Writing chunk.json chunks = [] for i, fastq_file in enumerate(fastq_files): chunk_id = "_".join(["map_isoforms_to_genome_chunk", str(i)]) d = { Constants.CHUNK_KEYS[0]: fastq_file, Constants.CHUNK_KEYS[1]: gmap_ref_file } c = PipelineChunk(chunk_id, **d) chunks.append(c) log.info("Writing chunk.json to %s", output_json_file) write_pipeline_chunks(chunks, output_json_file, "created by %s" % Constants.TOOL_ID) return 0
def run_main(fastq_file, gmap_ref_file, output_json_file, max_nchunks): """ Parameters: fastq_file -- HQ isoforms in FASTQ gmap_ref_file -- GMAP reference set xml output_json -- chunk.json """ # Check size of fastq_file before scattering, so that a meaningful # error message can be displayed instead of 'float division by zero' if os.stat(fastq_file).st_size == 0: raise IOError("Fastq file %s is empty, exiting." % fastq_file) # Chunk FASTQ output_fastq_json = output_json_file + ".fastq.json" output_dir = op.dirname(output_json_file) CU.write_fastq_chunks_to_file(output_fastq_json, fastq_file, max_nchunks, output_dir, "scattered-fastq", "fastq") # get fastq_ids from output_fastq_json fastq_chunks = load_pipeline_chunks_from_json(output_fastq_json) fastq_files = get_datum_from_chunks_by_chunk_key(fastq_chunks, "$chunk.fastq_id") log.debug("Chunked FASTQ files are %s.", (', '.join(fastq_files))) # Writing chunk.json chunks = [] for i, fastq_file in enumerate(fastq_files): chunk_id = "_".join(["map_isoforms_to_genome_chunk", str(i)]) d = {Constants.CHUNK_KEYS[0]: fastq_file, Constants.CHUNK_KEYS[1]: gmap_ref_file} c = PipelineChunk(chunk_id, **d) chunks.append(c) log.info("Writing chunk.json to %s", output_json_file) write_pipeline_chunks(chunks, output_json_file, "created by %s" % Constants.TOOL_ID) return 0
def run_main(chunk_json, sam_output, chunk_key): """run main""" chunks = load_pipeline_chunks_from_json(chunk_json) # Allow looseness if not chunk_key.startswith('$chunk.'): chunk_key = '$chunk.' + chunk_key log.warn("Prepending chunk key with '$chunk.' to '%s'", str(chunk_key)) sam_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) log.debug("Chunked SAM files are %s.", (', '.join(sam_files))) log.info("Concatenate chunked SAM files to %s.", sam_output) # concatenate sam files unsorted_sam_output = sam_output + ".unsorted.sam" concatenate_sam(sam_files, unsorted_sam_output) # then sort sort_sam(unsorted_sam_output, sam_output) # remove intermediate file rmpath(unsorted_sam_output) return 0
def run(chunk_input_json, output_file, chunk_key): chunks = load_pipeline_chunks_from_json(chunk_input_json) chunked_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) _ = combine_nfl_pickles(chunked_files, output_file) return 0