示例#1
0
def script__test_ssi_stamper(sample, sample_yaml, sample_component):
    comps = get_components(sample)
    if "detected_species" in sample["properties"]:
        species = datahandling.load_species(
            sample["properties"]["detected_species"])
    else:
        species = datahandling.load_species(None)

    results, summary, stamp = stamps.ssi_stamp.test(comps['whats_my_species'],
                                                    comps['qcquickie'],
                                                    comps['assemblatron'],
                                                    species, sample)

    datadump_dict = datahandling.load_sample_component(sample_component)
    datadump_dict["summary"] = summary
    datadump_dict["results"] = results
    datahandling.save_sample_component(datadump_dict, sample_component)

    # Get the _id back

    datadump_dict = datahandling.load_sample_component(sample_component)
    stamp["_sample_component"] = datadump_dict["_id"]

    stamp_list = sample.get("stamps", [])
    stamp_list.append(stamp)
    sample["stamps"] = stamp_list
    datahandling.save_sample(sample, sample_yaml)
    return 0
示例#2
0
def script__test_ssi_stamper(sample, sample_yaml, sample_component, log_err):
    # Genering error handling to redirect output to stderr file
    try:
        comps = get_components(sample)
        if "detected_species" in sample["properties"]:
            species = datahandling.load_species(
                sample["properties"]["detected_species"])
        else:
            species = datahandling.load_species(None)

        results, summary, stamp = stamps.ssi_stamp.test(
            comps['whats_my_species'], comps['assemblatron'], species, sample)

        datadump_dict = datahandling.load_sample_component(sample_component)
        datadump_dict["summary"] = summary
        datadump_dict["results"] = results
        datahandling.save_sample_component(datadump_dict, sample_component)

        # Get the _id back

        datadump_dict = datahandling.load_sample_component(sample_component)
        stamp["_sample_component"] = datadump_dict["_id"]

        stamp_dict = sample.get("stamps", {})
        stamp_list = stamp_dict.get("stamp_list", [])
        stamp_list.append(stamp)
        stamp_dict["stamp_list"] = stamp_list
        stamp_dict[stamp["name"]] = stamp
        sample["stamps"] = stamp_dict

        datahandling.save_sample(sample, sample_yaml)
        return 0
    except Exception:
        datahandling.log(log_err, str(traceback.format_exc()))
        exit(1)
示例#3
0
def script__datadump_assemblatron(folder, sample):
    folder = str(folder)
    sample = str(sample)
    data_dict = datahandling.load_sample_component(sample)
    data_dict["summary"] = data_dict.get("summary", {})
    data_dict["results"] = data_dict.get("results", {})

    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "contigs.sum.cov",
                                               extract_contigs_sum_cov)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "contigs.bin.cov",
                                               extract_contigs_bin_cov)
    data_dict = datahandling.datadump_template(
        data_dict, folder, "log/setup__filter_reads_with_bbduk.err.log",
        extract_bbuk_log)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "quast/report.tsv",
                                               extract_quast_report)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "contigs.variants",
                                               extract_contig_variants)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "contigs.stats",
                                               extract_contig_stats)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "contigs.sketch",
                                               extract_contig_sketch)

    datahandling.save_sample_component(data_dict, sample)

    return 0
示例#4
0
def script__datadump_whats_my_species(folder, sample):
    folder = str(folder)
    sample = str(sample)
    data_dict = datahandling.load_sample_component(sample)
    data_dict["summary"] = data_dict.get("summary", {})
    data_dict["results"] = data_dict.get("results", {})

    data_dict = datahandling.datadump_template(
        data_dict, folder, "log/setup__filter_reads_with_bbduk.err.log",
        extract_bbuk_log)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "bracken.txt",
                                               extract_bracken_txt)
    data_dict = datahandling.datadump_template(
        data_dict, folder, "kraken_report_bracken.txt",
        extract_kraken_report_bracken_txt)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "kraken_report.txt",
                                               extract_kraken_report_txt)
    data_dict = combine_bbduk_log_bracken_txt_kraken_report_bracken_txt(
        data_dict)

    datahandling.save_sample_component(data_dict, sample)

    return 0
示例#5
0
def script__datadump_ariba_mlst(folder, sample, sample_yaml):
    folder = str(folder)
    sample = str(sample)

    datadump_dict = datahandling.load_sample_component(sample)
    datadump_dict["summary"] = datadump_dict.get("summary", {})
    datadump_dict["results"] = datadump_dict.get("results", {})
    mlst_database = datahandling.get_mlst_species_DB(sample_yaml)
    datadump_dict["results"]["mlst_db"] = mlst_database
    datadump_dict["summary"]["mlst_db"] = mlst_database

    datadump_dict = extract_tsv(datadump_dict, folder, "ariba_mlst/report.tsv")

    datadump_dict = extract_tsv(datadump_dict, folder,
                                "ariba_mlst/mlst_report.tsv")

    # Summary:
    try:
        datadump_dict["summary"]["mlst_report"] = ",".join([
            "{}:{}".format(key, val) for key, val in datadump_dict["results"]
            ["ariba_mlst/mlst_report_tsv"]["values"][0].items()
        ])
    except KeyError as e:
        datadump_dict["summary"]["mlst_report"] = "KeyError: {}".format(e)

    datahandling.save_sample_component(datadump_dict, sample)

    return 0
示例#6
0
def script__datadump(folder, sample, sample_file_name, component_file_name):
    db_sample = datahandling.load_sample(sample_file_name)
    db_component = datahandling.load_component(component_file_name)

    folder = str(folder)
    sample = str(sample)

    datadump_dict = datahandling.load_sample_component(sample)
    datadump_dict["summary"] = datadump_dict.get("summary", {})
    datadump_dict["results"] = datadump_dict.get("results", {})

    species = db_sample["properties"]["species"]

    datadump_dict["summary"]["db"] = []
    datadump_dict["summary"]["strain"] = []
    datadump_dict["summary"]["alleles"] = []
    datadump_dict["summary"]["component"] = {"id": db_component["_id"], "date": datetime.datetime.utcnow()}

    mlst_species = db_component["mlst_species_mapping"][species]
    for mlst_entry in mlst_species:
        mlst_entry_db = datahandling.load_yaml("cge_mlst/" + mlst_entry + "/data.json")
        datadump_dict["results"][mlst_entry] = mlst_entry_db
        datadump_dict["summary"]["db"].append(mlst_entry)
        datadump_dict["summary"]["strain"].append(mlst_entry_db["mlst"]["results"].get("sequence_type","NA"))
        datadump_dict["summary"]["alleles"].append(",".join([mlst_entry_db["mlst"]["results"]["allele_profile"][i]["allele_name"] for i in [i for i in mlst_entry_db["mlst"]["results"]["allele_profile"]]]))

    db_sample["properties"]["mlst"] = datadump_dict["summary"]
    datahandling.save_sample_component(datadump_dict, sample)
    datahandling.save_sample(db_sample, sample_file_name)

    return 0
示例#7
0
def script__datadump_analyzer(folder, sample):
    folder = str(folder)
    sample = str(sample)

    datadump_dict = datahandling.load_sample_component(sample)
    datadump_dict["summary"] = datadump_dict.get("summary", {})
    datadump_dict["results"] = datadump_dict.get("results", {})

    datadump_dict = extract_tsv(datadump_dict, folder,
                                "abricate_on_plasmidfinder_from_ariba.tsv")

    datadump_dict = extract_tsv(datadump_dict, folder,
                                "ariba_plasmidfinder/report.tsv")

    # Summary:
    try:
        datadump_dict["summary"]["ariba_plasmidfinder"] = datadump_dict[
            "results"]["abricate_on_plasmidfinder_from_ariba_tsv"]["values"]
    except KeyError as e:
        datadump_dict["summary"][
            "ariba_plasmidfinder"] = "KeyError: {}".format(e)

    datahandling.save_sample_component(datadump_dict, sample)

    return 0
示例#8
0
def set_status_to_running(sample_component):
    sample_component = str(sample_component)
    sample_component_entry = datahandling.load_sample_component(
        sample_component)
    sample_component_entry["status"] = "Running"
    datahandling.save_sample_component(sample_component_entry,
                                       sample_component)
    return 0
示例#9
0
def script__datadump_kma_pointmutations(folder, sample):
    folder = str(folder)
    sample = str(sample)
    data_dict = datahandling.load_sample_component(sample)
    data_dict["summary"] = data_dict.get("summary", {})
    data_dict["results"] = data_dict.get("results", {})
    data_dict = extract_tsv(data_dict, folder, "contigs_blastn_results.tsv")

    datahandling.save_sample_component(data_dict, sample)

    return 0
示例#10
0
def script__datadump_min_read_check(folder, sample):
    folder = str(folder)
    sample = str(sample)
    data_dict = datahandling.load_sample_component(sample)
    data_dict["summary"] = data_dict.get("summary", {})
    data_dict["results"] = data_dict.get("results", {})

    data_dict = datahandling.datadump_template(
        data_dict, folder, "log/setup__filter_reads_with_bbduk.err.log",
        extract_bbuk_log)

    datahandling.save_sample_component(data_dict, sample)

    return 0
def script__initialization(requirements_file, sample, sample_component,
                           output_file, log_out, log_err):
    set_status_to_running(sample_component)
    if requirements_met(requirements_file, sample, log_out, log_err) == True:
        datahandling.log(log_out, "{}\n{}\n".format(os.getcwd(), output_file))
        with open(str(output_file), "w") as handle:
            handle.write("Requirements met")
            pass
    else:
        datahandling.log(log_err, "Requirements not met")
        sample_component_entry = datahandling.load_sample_component(
            sample_component)
        sample_component_entry["status"] = "Requirements not met"
        datahandling.save_sample_component(sample_component_entry,
                                           sample_component)
    return 0
示例#12
0
def script__datadump_analyzer(analyzer_folder, sample):
    analyzer_folder = str(analyzer_folder)
    sample = str(sample)

    datadump_dict = datahandling.load_sample_component(sample)
    datadump_dict["summary"] = datadump_dict.get("summary", {})
    datadump_dict["results"] = datadump_dict.get("results", {})

    datadump_dict = extract_tsv(datadump_dict, analyzer_folder,
                                "ariba_mlst/report.tsv")

    datadump_dict = extract_tsv(datadump_dict, analyzer_folder,
                                "ariba_mlst/mlst_report.tsv")

    datadump_dict = extract_tsv(datadump_dict, analyzer_folder,
                                "abricate_on_plasmidfinder_from_ariba.tsv")

    datadump_dict = extract_tsv(datadump_dict, analyzer_folder,
                                "ariba_plasmidfinder/report.tsv")

    datadump_dict = extract_tsv(datadump_dict, analyzer_folder,
                                "ariba_resfinder/report.tsv")

    datadump_dict = extract_tsv(datadump_dict, analyzer_folder,
                                "abricate_on_resfinder_from_ariba.tsv")

    ## Summary:
    try:
        datadump_dict["summary"]["ariba_resfinder"] = datadump_dict["results"][
            "abricate_on_plasmidfinder_from_ariba_tsv"]["values"]
    except KeyError as e:
        datadump_dict["summary"]["ariba_resfinder"] = "KeyError: {}".format(e)
    try:
        datadump_dict["summary"]["mlst_report"] = ",".join([
            "{}:{}".format(key, val) for key, val in datadump_dict["results"]
            ["ariba_mlst/mlst_report_tsv"]["values"][0].items()
        ])
    except KeyError as e:
        datadump_dict["summary"]["mlst_report"] = "KeyError: {}".format(e)

    datahandling.save_sample_component(datadump_dict, sample)

    return 0
示例#13
0
def script__datadump_whats_my_species(folder, sample):
    folder = str(folder)
    sample = str(sample)
    data_dict = datahandling.load_sample_component(sample)
    data_dict["summary"] = data_dict.get("summary", {})
    data_dict["results"] = data_dict.get("results", {})

    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "bracken.txt",
                                               extract_bracken_txt)
    data_dict = datahandling.datadump_template(
        data_dict, folder, "kraken_report_bracken.txt",
        extract_kraken_report_bracken_txt)
    data_dict = datahandling.datadump_template(data_dict, folder,
                                               "kraken_report.txt",
                                               extract_kraken_report_txt)
    data_dict = species_math(data_dict)

    datahandling.save_sample_component(data_dict, sample)

    return 0
示例#14
0
test_samples = extract_tsv(samples_file)

samples = []
for i in range(len(test_samples)):
    samples.append(test_samples[i]['sample'])

output = []
for sample in samples:
    temp = []
    genes = []
    bs = []
    bgs = []

    file = sample + "/" + sample + "__pointfinder.yaml"
    file2 = sample + "/" + sample + "__ariba_resfinder.yaml"
    data = datahandling.load_sample_component(file)
    data2 = datahandling.load_sample_component(file2)

    matchinfo_short = ""
    row = pd.DataFrame(0, index=range(1), columns=range(96))
    row.columns = template.columns

    row['isolate'] = sample

    if 'summary' in data2:
        for n in range(len(data2['summary']['ariba_resfinder'])):
            if 'DATABASE' in data2['summary']['ariba_resfinder'][n]:
                temp.append(
                    data2['summary']['ariba_resfinder'][n]['DATABASE'][13:])
            if 'GENE' in data2['summary']['ariba_resfinder'][n]:
                genes.append(