def run(in_file, ref, blastn_config, config): logger.info("Preparing the reference file for %s." % (ref.get("name"))) ref_file = prepare_ref_file(ref, config) logger.info("Preparing the blast database for %s." % (ref.get("name"))) blast_db = prepare_blast_db(ref_file, "nucl") logger.info("Blasting %s against %s." % (in_file, ref.get("name"))) results_dir = build_results_dir(blastn_config, config) utils.safe_makedir(results_dir) out_file = os.path.join(results_dir, replace_suffix(os.path.basename(in_file), ref.get("name") + "hits.tsv")) tmp_out = out_file + ".tmp" blast_results = blast_search(in_file, blast_db, tmp_out) #logger.info("Filtering results for at least %f percent of the " # "sequences covered." %(0.5*100)) #filtered_results = filter_results_by_length(blast_results, 0.5) #logger.info("Filtered output file here: %s" %(filtered_results)) with open(blast_results) as in_handle: reader = csv.reader(in_handle, delimiter="\t") with open(out_file, "w") as out_handle: writer = csv.writer(out_handle, delimiter="\t") writer.writerow(HEADER_FIELDS.split(" ")) for line in reader: writer.writerow(line) return out_file
def _build_output_prefix(input_file, jellyfish_config, config): out_dir = build_results_dir(jellyfish_config, config) out_prefix = os.path.join(out_dir, replace_suffix(input_file, "count")) #out_prefix = "_".join([jellyfish_config["name"], # remove_suffix(input_file)]) return out_prefix
def run(in_file, ref, blastn_config, config): logger.info("Preparing the reference file for %s." % (ref.get("name"))) ref_file = prepare_ref_file(ref, config) logger.info("Preparing the blast database for %s." % (ref.get("name"))) blast_db = prepare_blast_db(ref_file, "nucl") logger.info("Blasting %s against %s." % (in_file, ref.get("name"))) results_dir = build_results_dir(blastn_config, config) utils.safe_makedir(results_dir) out_file = os.path.join( results_dir, replace_suffix(os.path.basename(in_file), ref.get("name") + "hits.tsv")) tmp_out = out_file + ".tmp" blast_results = blast_search(in_file, blast_db, tmp_out) #logger.info("Filtering results for at least %f percent of the " # "sequences covered." %(0.5*100)) #filtered_results = filter_results_by_length(blast_results, 0.5) #logger.info("Filtered output file here: %s" %(filtered_results)) with open(blast_results) as in_handle: reader = csv.reader(in_handle, delimiter="\t") with open(out_file, "w") as out_handle: writer = csv.writer(out_handle, delimiter="\t") writer.writerow(HEADER_FIELDS.split(" ")) for line in reader: writer.writerow(line) return out_file
def _build_output_file(input_file, novoalign_config, config): outdir = build_results_dir(novoalign_config, config) safe_makedir(outdir) return os.path.join(outdir, os.path.basename(replace_suffix(input_file, "sam")))