Пример #1
0
    print("Running on the node {}".format(os.uname()[1]))
except:
    pass

sleep(np.random.randint(90))
print("Polling the queue")

remote_queue = os.path.join(ProjectDescriber.DATA_DIR, "correlation_data",
                            "group_datasets", "tables.txt")
correlation_tables = Utilities.remove_empty_values(
    Utilities.load_list(remote_queue))
if len(correlation_tables) == 0:
    print("Empty remote queue")
    sys.exit(0)

Utilities.dump_list(correlation_tables[1:], remote_queue)

correlation_table = correlation_tables[0]
print("Now processing: '{}'".format(correlation_table))

group_name = os.path.splitext(os.path.basename(correlation_table))[0]
out_dir = os.path.join(ProjectDescriber.DATA_DIR, "correlation_data",
                       "group_results", group_name)

correlation_df = load_tsv(correlation_table).dropna(axis=0, how="any")
feature_groups = sorted(set([i.split("@")[0] for i in correlation_df.columns]))

if len(feature_groups) < 2:
    queue = list(combinations(correlation_df.columns, 2))
else:
    queue = list(
Пример #2
0
    ])
    feature_dfs[feature_name] = feature_df.query(
        "sample_source == '{}'".format(sample_source))

feature_dfs.update(raw_data_dfs)

correlation_dir = os.path.join(ProjectDescriber.DATA_DIR, "correlation_data",
                               "group_datasets")

group_combinations = list(
    product(combinations_with_replacement(feature_dfs.keys(), 2), AGE_GROUPS,
            DIAGNOSIS_GROUPS))

correlation_tables = []
for feature_pair, age, diagnosis in group_combinations:
    query = "age == '{}' and diagnosis == '{}'".format(age, diagnosis)
    correlation_df = concat([
        select_data_columns(feature_dfs[i].query(query).reset_index())
        for i in sorted(set(feature_pair))
    ])
    correlation_table = os.path.join(
        correlation_dir, "{}_for_{}_{}.tsv".format(" vs ".join(feature_pair),
                                                   age, diagnosis))
    dump_tsv(correlation_df, correlation_table)
    correlation_tables.append(correlation_table)

Utilities.dump_list(correlation_tables,
                    os.path.join(correlation_dir, "tables.txt"))
Utilities.dump_list(correlation_tables,
                    os.path.join(correlation_dir, "tables.txt.bak"))