out_folder=out_folder), "w") as handle:
    json.dump(config_file, handle, indent=2, sort_keys=True)

threads = int(threads)
seqid = 0.7
covmode = 1
cov = 0.8
boots = 10
min_genomes = 10
remove_singleton_gcs = True
module_cutoff = 0.75
temp_folder = pjoin(config_file['temp_folder'], "binsets", binset_name)
gc_folder = pjoin(temp_folder, "gene_clusters")
kegg_folder = pjoin(temp_folder, "KEGGs")

freetxt_line("Creating temp folder: " + temp_folder, logfile)

os.makedirs(temp_folder, exist_ok=True)
os.makedirs(gc_folder, exist_ok=True)

title2log("copying bins to temp_folder", logfile)

os.makedirs(pjoin(temp_folder, "bins"), exist_ok=True)
for bin_ in tqdm(os.listdir(pjoin(out_folder, "bins"))):
    shutil.copyfile(pjoin(out_folder, "bins", bin_, bin_ + ".gff"),
                    pjoin(temp_folder, "bins", bin_ + ".gff"))
    shutil.copyfile(pjoin(out_folder, "bins", bin_, bin_ + ".faa"),
                    pjoin(temp_folder, "bins", bin_ + ".faa"))
    shutil.copyfile(pjoin(out_folder, "bins", bin_, bin_ + ".db"),
                    pjoin(temp_folder, "bins", bin_ + ".db"))
Exemplo n.º 2
0
logfile = pjoin(out_folder, 'logs', "binset.log")
config_file = generate_config(config_file)

call("conda env export > {out_folder}/logs/binset.yaml".format(out_folder = out_folder), shell=True)
with open("{out_folder}/logs/binning_settings.json".format(out_folder = out_folder), "w") as handle:
    json.dump(config_file, handle, indent = 2, sort_keys = True)

min_completeness = config_file['binsets'][binset_name]['min_completeness']
max_contamination = config_file['binsets'][binset_name]['max_contamination']
min_size = config_file['binsets'][binset_name]['min_size']
min_coding = config_file['binsets'][binset_name]['min_coding']
keep_fails = config_file['binsets'][binset_name]['keep_fails']

temp_folder = pjoin(config_file['temp_folder'], "binsets", binset_name)
freetxt_line("Creating temp folder: " + temp_folder, logfile)

os.makedirs(temp_folder, exist_ok=True)


binnings = [pjoin(root_folder, "binnings", binni, "bins") for binni in config_file['binsets'][binset_name]['binnings']]
binsets = config_file['binsets'][binset_name]['binsets']
external_bins =  config_file['binsets'][binset_name]['external_bins']
tbinfoder = "{temp}/bins".format(temp = temp_folder)
cbinfoder = "{temp}/clean_bins".format(temp = temp_folder)
os.makedirs(cbinfoder, exist_ok = True)
stats = {}
formating_dat = {
'out_folder' : out_folder,
'temp_folder' : temp_folder,
'threads' : threads,
Exemplo n.º 3
0
taxfield = config_file['mappings'][mapping_name]['taxfield']
precluster = config_file['mappings'][mapping_name]['precluster']
keep_mapped = config_file['mappings'][mapping_name]['keep_mapped']

alternate_root = config_file['mappings'][mapping_name]['alternate_root']
ani = config_file['mappings'][mapping_name]['min_nucleotide_id']
min_len = config_file['mappings'][mapping_name]['min_len']

threads = int(threads)
mrna_flag = "_mrna" if is_rna else ""

if not alternate_root:
    alternate_root = binset

temp_folder = pjoin(config_file['temp_folder'], "mappings", mapping_name)
freetxt_line("Creating temp folder: " + temp_folder, logfile)

os.makedirs(temp_folder, exist_ok=True)

title2log("copying binset to temp_folder", logfile)

shutil.copy(pjoin(root_folder, "binsets", binset, alternate_root + ".fna"),
            pjoin(temp_folder, "binset.fna"))
seqid = 0.95
cov = 0.9
covmode = 2
if precluster:
    call(
        f"mmseqs easy-cluster --min-seq-id {seqid} --cov-mode {covmode} -c {cov} --threads {threads} {temp_folder}/binset.fna {temp_folder}/binset {temp_folder}/mmseqs_temp #> {logfile}",
        shell=True)
script, lib_name, config_file, root_folder, out_folder, threads = sys.argv

logfile = pjoin(out_folder, 'logs', lib_name + ".log")

config_file = validate_description_json(config_file)

call("conda env export > {out_folder}/logs/library_rrna_spliting.yaml".format(
    out_folder=out_folder),
     shell=True)
with open(
        "{out_folder}/logs/library_rrna_spliting_settings.json".format(
            out_folder=out_folder), "w") as handle:
    json.dump(config_file, handle, indent=2, sort_keys=True)

temp_folder = pjoin(config_file['temp_folder'], "library_processing", lib_name)
freetxt_line("Creating temp folder: " + temp_folder, logfile)

os.makedirs(temp_folder, exist_ok=True)

title2log(
    "Ungzipping {lib_name}'s reads to temp_folder".format(lib_name=lib_name),
    logfile)

call("""
unpigz -kc {out_folder}/{lname}_fwd.fastq.gz > {temp}/fwd.fastq
unpigz -kc {out_folder}/{lname}_rev.fastq.gz > {temp}/rev.fastq
unpigz -kc {out_folder}/{lname}_unp.fastq.gz > {temp}/unp.fastq
""".format(out_folder=out_folder, lname=lib_name, temp=temp_folder),
     shell=True)

refs = "".join(
Exemplo n.º 5
0
     shell=True)
with open(
        "{out_folder}/logs/library_processing_settings.json".format(
            out_folder=out_folder), "w") as handle:
    json.dump(config_file, handle, indent=2, sort_keys=True)

rna = config_file['libraries'][lib_name]["rna"]
refs = " ".join([
    "--ref " + f
    for f in config_file['libraries'][lib_name]["sortmerna_refs"].split(";")
])

title2log("Starting processing library {}".format(lib_name), logfile)

temp_folder = pjoin(config_file['temp_folder'], "library_processing", lib_name)
freetxt_line("Creating temp folder: " + temp_folder, logfile)

os.makedirs(temp_folder, exist_ok=True)
os.makedirs(pjoin(out_folder, "logs/fastp_logs/"))

paired_fastp_line = "fastp -h /dev/null -j {temp}/{lib1}.json  --in1 {temp}/{lib1} --in2 {temp}/{lib2} --out1 {temp}/{lib1}_clean.fastq --out2 {temp}/{lib2}_clean.fastq --unpaired1 {temp}/{lib1}_unp.fastq --unpaired2 {temp}/{lib2}_unp.fastq  -w {threads}  >> {log} 2>&1"
single_fastp_line = "fastp -h /dev/null -j {temp}/{lib}.json  --in1 {temp}/{lib}  --out1 {temp}/{lib}_clean.fastq  -w {threads}  >> {log} 2>&1"
qc_log = {'paired': dict(), 'unpaired': dict()}

for fwd, rev in zip(config_file['libraries'][lib_name]["fwd"],
                    config_file['libraries'][lib_name]["rev"]):
    title2log(
        "QCing paired reads_library {} and {}, ".format(
            os.path.basename(fwd), os.path.basename(rev)), logfile)
    freetxt_line(
        "first copying reads to {temp_folder} ".format(