def script__test_ssi_stamper(sample, sample_yaml, sample_component): comps = get_components(sample) if "detected_species" in sample["properties"]: species = datahandling.load_species( sample["properties"]["detected_species"]) else: species = datahandling.load_species(None) results, summary, stamp = stamps.ssi_stamp.test(comps['whats_my_species'], comps['qcquickie'], comps['assemblatron'], species, sample) datadump_dict = datahandling.load_sample_component(sample_component) datadump_dict["summary"] = summary datadump_dict["results"] = results datahandling.save_sample_component(datadump_dict, sample_component) # Get the _id back datadump_dict = datahandling.load_sample_component(sample_component) stamp["_sample_component"] = datadump_dict["_id"] stamp_list = sample.get("stamps", []) stamp_list.append(stamp) sample["stamps"] = stamp_list datahandling.save_sample(sample, sample_yaml) return 0
def script__test_ssi_stamper(sample, sample_yaml, sample_component, log_err): # Genering error handling to redirect output to stderr file try: comps = get_components(sample) if "detected_species" in sample["properties"]: species = datahandling.load_species( sample["properties"]["detected_species"]) else: species = datahandling.load_species(None) results, summary, stamp = stamps.ssi_stamp.test( comps['whats_my_species'], comps['assemblatron'], species, sample) datadump_dict = datahandling.load_sample_component(sample_component) datadump_dict["summary"] = summary datadump_dict["results"] = results datahandling.save_sample_component(datadump_dict, sample_component) # Get the _id back datadump_dict = datahandling.load_sample_component(sample_component) stamp["_sample_component"] = datadump_dict["_id"] stamp_dict = sample.get("stamps", {}) stamp_list = stamp_dict.get("stamp_list", []) stamp_list.append(stamp) stamp_dict["stamp_list"] = stamp_list stamp_dict[stamp["name"]] = stamp sample["stamps"] = stamp_dict datahandling.save_sample(sample, sample_yaml) return 0 except Exception: datahandling.log(log_err, str(traceback.format_exc())) exit(1)
def script__datadump_assemblatron(folder, sample): folder = str(folder) sample = str(sample) data_dict = datahandling.load_sample_component(sample) data_dict["summary"] = data_dict.get("summary", {}) data_dict["results"] = data_dict.get("results", {}) data_dict = datahandling.datadump_template(data_dict, folder, "contigs.sum.cov", extract_contigs_sum_cov) data_dict = datahandling.datadump_template(data_dict, folder, "contigs.bin.cov", extract_contigs_bin_cov) data_dict = datahandling.datadump_template( data_dict, folder, "log/setup__filter_reads_with_bbduk.err.log", extract_bbuk_log) data_dict = datahandling.datadump_template(data_dict, folder, "quast/report.tsv", extract_quast_report) data_dict = datahandling.datadump_template(data_dict, folder, "contigs.variants", extract_contig_variants) data_dict = datahandling.datadump_template(data_dict, folder, "contigs.stats", extract_contig_stats) data_dict = datahandling.datadump_template(data_dict, folder, "contigs.sketch", extract_contig_sketch) datahandling.save_sample_component(data_dict, sample) return 0
def script__datadump_whats_my_species(folder, sample): folder = str(folder) sample = str(sample) data_dict = datahandling.load_sample_component(sample) data_dict["summary"] = data_dict.get("summary", {}) data_dict["results"] = data_dict.get("results", {}) data_dict = datahandling.datadump_template( data_dict, folder, "log/setup__filter_reads_with_bbduk.err.log", extract_bbuk_log) data_dict = datahandling.datadump_template(data_dict, folder, "bracken.txt", extract_bracken_txt) data_dict = datahandling.datadump_template( data_dict, folder, "kraken_report_bracken.txt", extract_kraken_report_bracken_txt) data_dict = datahandling.datadump_template(data_dict, folder, "kraken_report.txt", extract_kraken_report_txt) data_dict = combine_bbduk_log_bracken_txt_kraken_report_bracken_txt( data_dict) datahandling.save_sample_component(data_dict, sample) return 0
def script__datadump_ariba_mlst(folder, sample, sample_yaml): folder = str(folder) sample = str(sample) datadump_dict = datahandling.load_sample_component(sample) datadump_dict["summary"] = datadump_dict.get("summary", {}) datadump_dict["results"] = datadump_dict.get("results", {}) mlst_database = datahandling.get_mlst_species_DB(sample_yaml) datadump_dict["results"]["mlst_db"] = mlst_database datadump_dict["summary"]["mlst_db"] = mlst_database datadump_dict = extract_tsv(datadump_dict, folder, "ariba_mlst/report.tsv") datadump_dict = extract_tsv(datadump_dict, folder, "ariba_mlst/mlst_report.tsv") # Summary: try: datadump_dict["summary"]["mlst_report"] = ",".join([ "{}:{}".format(key, val) for key, val in datadump_dict["results"] ["ariba_mlst/mlst_report_tsv"]["values"][0].items() ]) except KeyError as e: datadump_dict["summary"]["mlst_report"] = "KeyError: {}".format(e) datahandling.save_sample_component(datadump_dict, sample) return 0
def script__datadump(folder, sample, sample_file_name, component_file_name): db_sample = datahandling.load_sample(sample_file_name) db_component = datahandling.load_component(component_file_name) folder = str(folder) sample = str(sample) datadump_dict = datahandling.load_sample_component(sample) datadump_dict["summary"] = datadump_dict.get("summary", {}) datadump_dict["results"] = datadump_dict.get("results", {}) species = db_sample["properties"]["species"] datadump_dict["summary"]["db"] = [] datadump_dict["summary"]["strain"] = [] datadump_dict["summary"]["alleles"] = [] datadump_dict["summary"]["component"] = {"id": db_component["_id"], "date": datetime.datetime.utcnow()} mlst_species = db_component["mlst_species_mapping"][species] for mlst_entry in mlst_species: mlst_entry_db = datahandling.load_yaml("cge_mlst/" + mlst_entry + "/data.json") datadump_dict["results"][mlst_entry] = mlst_entry_db datadump_dict["summary"]["db"].append(mlst_entry) datadump_dict["summary"]["strain"].append(mlst_entry_db["mlst"]["results"].get("sequence_type","NA")) datadump_dict["summary"]["alleles"].append(",".join([mlst_entry_db["mlst"]["results"]["allele_profile"][i]["allele_name"] for i in [i for i in mlst_entry_db["mlst"]["results"]["allele_profile"]]])) db_sample["properties"]["mlst"] = datadump_dict["summary"] datahandling.save_sample_component(datadump_dict, sample) datahandling.save_sample(db_sample, sample_file_name) return 0
def script__datadump_analyzer(folder, sample): folder = str(folder) sample = str(sample) datadump_dict = datahandling.load_sample_component(sample) datadump_dict["summary"] = datadump_dict.get("summary", {}) datadump_dict["results"] = datadump_dict.get("results", {}) datadump_dict = extract_tsv(datadump_dict, folder, "abricate_on_plasmidfinder_from_ariba.tsv") datadump_dict = extract_tsv(datadump_dict, folder, "ariba_plasmidfinder/report.tsv") # Summary: try: datadump_dict["summary"]["ariba_plasmidfinder"] = datadump_dict[ "results"]["abricate_on_plasmidfinder_from_ariba_tsv"]["values"] except KeyError as e: datadump_dict["summary"][ "ariba_plasmidfinder"] = "KeyError: {}".format(e) datahandling.save_sample_component(datadump_dict, sample) return 0
def set_status_to_running(sample_component): sample_component = str(sample_component) sample_component_entry = datahandling.load_sample_component( sample_component) sample_component_entry["status"] = "Running" datahandling.save_sample_component(sample_component_entry, sample_component) return 0
def script__datadump_kma_pointmutations(folder, sample): folder = str(folder) sample = str(sample) data_dict = datahandling.load_sample_component(sample) data_dict["summary"] = data_dict.get("summary", {}) data_dict["results"] = data_dict.get("results", {}) data_dict = extract_tsv(data_dict, folder, "contigs_blastn_results.tsv") datahandling.save_sample_component(data_dict, sample) return 0
def script__datadump_min_read_check(folder, sample): folder = str(folder) sample = str(sample) data_dict = datahandling.load_sample_component(sample) data_dict["summary"] = data_dict.get("summary", {}) data_dict["results"] = data_dict.get("results", {}) data_dict = datahandling.datadump_template( data_dict, folder, "log/setup__filter_reads_with_bbduk.err.log", extract_bbuk_log) datahandling.save_sample_component(data_dict, sample) return 0
def script__initialization(requirements_file, sample, sample_component, output_file, log_out, log_err): set_status_to_running(sample_component) if requirements_met(requirements_file, sample, log_out, log_err) == True: datahandling.log(log_out, "{}\n{}\n".format(os.getcwd(), output_file)) with open(str(output_file), "w") as handle: handle.write("Requirements met") pass else: datahandling.log(log_err, "Requirements not met") sample_component_entry = datahandling.load_sample_component( sample_component) sample_component_entry["status"] = "Requirements not met" datahandling.save_sample_component(sample_component_entry, sample_component) return 0
def script__datadump_analyzer(analyzer_folder, sample): analyzer_folder = str(analyzer_folder) sample = str(sample) datadump_dict = datahandling.load_sample_component(sample) datadump_dict["summary"] = datadump_dict.get("summary", {}) datadump_dict["results"] = datadump_dict.get("results", {}) datadump_dict = extract_tsv(datadump_dict, analyzer_folder, "ariba_mlst/report.tsv") datadump_dict = extract_tsv(datadump_dict, analyzer_folder, "ariba_mlst/mlst_report.tsv") datadump_dict = extract_tsv(datadump_dict, analyzer_folder, "abricate_on_plasmidfinder_from_ariba.tsv") datadump_dict = extract_tsv(datadump_dict, analyzer_folder, "ariba_plasmidfinder/report.tsv") datadump_dict = extract_tsv(datadump_dict, analyzer_folder, "ariba_resfinder/report.tsv") datadump_dict = extract_tsv(datadump_dict, analyzer_folder, "abricate_on_resfinder_from_ariba.tsv") ## Summary: try: datadump_dict["summary"]["ariba_resfinder"] = datadump_dict["results"][ "abricate_on_plasmidfinder_from_ariba_tsv"]["values"] except KeyError as e: datadump_dict["summary"]["ariba_resfinder"] = "KeyError: {}".format(e) try: datadump_dict["summary"]["mlst_report"] = ",".join([ "{}:{}".format(key, val) for key, val in datadump_dict["results"] ["ariba_mlst/mlst_report_tsv"]["values"][0].items() ]) except KeyError as e: datadump_dict["summary"]["mlst_report"] = "KeyError: {}".format(e) datahandling.save_sample_component(datadump_dict, sample) return 0
def script__datadump_whats_my_species(folder, sample): folder = str(folder) sample = str(sample) data_dict = datahandling.load_sample_component(sample) data_dict["summary"] = data_dict.get("summary", {}) data_dict["results"] = data_dict.get("results", {}) data_dict = datahandling.datadump_template(data_dict, folder, "bracken.txt", extract_bracken_txt) data_dict = datahandling.datadump_template( data_dict, folder, "kraken_report_bracken.txt", extract_kraken_report_bracken_txt) data_dict = datahandling.datadump_template(data_dict, folder, "kraken_report.txt", extract_kraken_report_txt) data_dict = species_math(data_dict) datahandling.save_sample_component(data_dict, sample) return 0
test_samples = extract_tsv(samples_file) samples = [] for i in range(len(test_samples)): samples.append(test_samples[i]['sample']) output = [] for sample in samples: temp = [] genes = [] bs = [] bgs = [] file = sample + "/" + sample + "__pointfinder.yaml" file2 = sample + "/" + sample + "__ariba_resfinder.yaml" data = datahandling.load_sample_component(file) data2 = datahandling.load_sample_component(file2) matchinfo_short = "" row = pd.DataFrame(0, index=range(1), columns=range(96)) row.columns = template.columns row['isolate'] = sample if 'summary' in data2: for n in range(len(data2['summary']['ariba_resfinder'])): if 'DATABASE' in data2['summary']['ariba_resfinder'][n]: temp.append( data2['summary']['ariba_resfinder'][n]['DATABASE'][13:]) if 'GENE' in data2['summary']['ariba_resfinder'][n]: genes.append(