Exemplo n.º 1
0
def parse_spades_version(sample_name_):
    log_file = [
        i for i in Utilities.scan_whole_dir(
            "/data1/bio/projects/vradchenko/lactobacillus_salivarius/pga-pe/log"
        )
        if i.endswith(".log") and all(j in i for j in ["spades", sample_name_])
    ][0]
    log_lines = Utilities.load_list(log_file)
    image_version_line = [
        i for i in log_lines
        if i.strip().startswith("Status: Image is up to date for ")
    ][0].strip()
    spades_version = re.split("[\t ]+", image_version_line)[-1]
    return spades_version
Exemplo n.º 2
0
    d["is_correlation_valid"] = True
    return _process_out()


try:
    print("Running on the node {}".format(os.uname()[1]))
except:
    pass

sleep(np.random.randint(90))
print("Polling the queue")

remote_queue = os.path.join(ProjectDescriber.DATA_DIR, "correlation_data",
                            "group_datasets", "tables.txt")
correlation_tables = Utilities.remove_empty_values(
    Utilities.load_list(remote_queue))
if len(correlation_tables) == 0:
    print("Empty remote queue")
    sys.exit(0)

Utilities.dump_list(correlation_tables[1:], remote_queue)

correlation_table = correlation_tables[0]
print("Now processing: '{}'".format(correlation_table))

group_name = os.path.splitext(os.path.basename(correlation_table))[0]
out_dir = os.path.join(ProjectDescriber.DATA_DIR, "correlation_data",
                       "group_results", group_name)

correlation_df = load_tsv(correlation_table).dropna(axis=0, how="any")
feature_groups = sorted(set([i.split("@")[0] for i in correlation_df.columns]))