Пример #1
0
def extract_kraken_report_bracken_txt(file_path, key, data_dict):
    buffer = datahandling.read_buffer(file_path)
    buffer = buffer.split("\n")
    if len(buffer) > 1:
        data_dict["results"][key]["unclassified_count"] = int(
            buffer[0].split("\t")[1])
    return data_dict
Пример #2
0
def extract_contig_stats(file_path, key, data_dict):
    buffer = datahandling.read_buffer(file_path)
    data_dict["results"][key]["insert_size_avg"] = float(
        re.search("insert size average:\s*([0-9]+[\.]?[0-9]*)", buffer,
                  re.MULTILINE).group(1))
    data_dict["summary"]["insert_size_avg"] = data_dict["results"][key][
        "insert_size_avg"]
    return data_dict
Пример #3
0
def extract_kraken_report_txt(file_path, key, data_dict):
    buffer = datahandling.read_buffer(file_path)
    buffer = buffer.split("\n")
    data_dict["results"][key]["kraken_output"] = []
    for item in buffer:
        data_dict["results"][key]["kraken_output"].append(
            [value.strip() for value in item.split("\t")])
    return data_dict
Пример #4
0
def extract_bbuk_log(file_path, key, data_dict):
    buffer = datahandling.read_buffer(file_path)
    data_dict["results"][key]["input_reads_num"] = int(
        re.search("Input:\s*([0-9]+)\sreads", buffer, re.MULTILINE).group(1))
    data_dict["results"][key]["filtered_reads_num"] = int(
        re.search("Result:\s*([0-9]+)\sreads", buffer, re.MULTILINE).group(1))
    data_dict["results"][key]["input_reads_bases"] = int(
        re.search("Input:.*?([0-9]+)\sbases", buffer, re.MULTILINE).group(1))
    data_dict["results"][key]["filtered_reads_bases"] = int(
        re.search("Result:.*?([0-9]+)\sbases", buffer, re.MULTILINE).group(1))
    data_dict["summary"]["filtered_reads_num"] = data_dict["results"][key][
        "filtered_reads_num"]
    return data_dict
Пример #5
0
def extract_quast_report(file_path, key, data_dict):
    buffer = datahandling.read_buffer(file_path)
    data_dict["results"][key]["GC"] = float(
        re.search("GC \(%\)\t([0-9]+[\.]?[0-9]*)", buffer,
                  re.MULTILINE).group(1))
    data_dict["results"][key]["N50"] = int(
        re.search("N50\t([0-9]+)", buffer, re.MULTILINE).group(1))
    data_dict["results"][key]["N75"] = int(
        re.search("N75\t([0-9]+)", buffer, re.MULTILINE).group(1))
    data_dict["results"][key]["L50"] = int(
        re.search("L50\t([0-9]+)", buffer, re.MULTILINE).group(1))
    data_dict["results"][key]["L75"] = int(
        re.search("L75\t([0-9]+)", buffer, re.MULTILINE).group(1))
    data_dict["summary"]["GC"] = data_dict["results"][key]["GC"]
    data_dict["summary"]["N50"] = data_dict["results"][key]["N50"]
    return data_dict
Пример #6
0
def extract_bracken_txt(file_path, key, data_dict):
    buffer = datahandling.read_buffer(file_path)
    buffer = buffer.split("\n")
    if len(buffer) > 1:
        for i in range(1, len(buffer) - 1):  # skip first line as it's header
            data_dict["results"][key]["species_" + str(i) +
                                      "_name"] = buffer[i].split("\t")[0]
            data_dict["results"][key][
                "species_" + str(i) +
                "_kraken_assigned_reads"] = buffer[i].split("\t")[3]
            data_dict["results"][key]["species_" + str(i) +
                                      "_added_reads"] = buffer[i].split(
                                          "\t")[4]
            data_dict["results"][key]["species_" + str(i) + "_count"] = int(
                buffer[i].split("\t")[5].strip())
    return data_dict
Пример #7
0
def extract_contig_sketch(file_path, key, data_dict):
    buffer = datahandling.read_buffer(file_path)
    data_dict["results"][key] = buffer.split("\n")
    return data_dict