Exemplo n.º 1
0
def main(experiment_name, phenotypes, data_directory, anchor_genes,
         num_replicates=1, percent=0.4, num_anchors=50, min_dangle_size=3,
         max_dangle_size=10, test_ratio=0.5):
    assert isinstance(phenotypes, list)
    alphas = random.choices(range(min_dangle_size, max_dangle_size),
                            k=int(num_anchors * test_ratio))
    assert len(alphas) < len(anchor_genes)
    anchor_train_groups = []
    anchor_test_groups = []
    backbones = []
    # Create all backbones
    for rep_id in range(num_replicates):
        random.shuffle(anchor_genes)
        candidates = anchor_genes[:int(num_anchors)]
        genes_of_interest_train, genes_of_interest_test = train_test_split(
            candidates,
            shuffle=True,
            test_size=test_ratio)

        anchor_train_groups.append(genes_of_interest_train)
        anchor_test_groups.append(genes_of_interest_test)
        backbones.append(
            build_backbone(anchors=anchor_train_groups[rep_id], alphas=alphas,
                           weight=1, edge_percentage=percent))
    # Write train anchors to file
    with open(os.path.join(experiment_name, 'train_anchors.csv'), 'w') as fout:
        for gene_group in anchor_train_groups:
            fout.write(','.join(gene_group))
            fout.write("\n")
    # Write test anchors to file
    with open(os.path.join(experiment_name, 'test_anchors.csv'), 'w') as fout:
        for gene_group in anchor_test_groups:
            fout.write(','.join(gene_group))
            fout.write("\n")
    # Adding the backbones and create the similarity object
    for pheno in phenotypes:
        file_name = os.path.join(data_directory, "{}.csv".format(pheno))
        for rep_id in range(num_replicates):
            sim_file_name = "anchored_{}_{}.csv".format(pheno, str(rep_id))
            out_address = os.path.join(experiment_name, sim_file_name)
            similarity = Similarity(file_name,
                                    anchors=anchor_train_groups[rep_id],
                                    alphas=alphas, string_id=True)
            similarity.transform()
            similarity.apply_threshold(lower_cor=0.2, upper_cor=0.8,
                                       value=0)
            similarity.augment(backbones[rep_id])

            similarity.to_csv(out_address)