def main(experiment_name, phenotypes, data_directory, anchor_genes, num_replicates=1, percent=0.4, num_anchors=50, min_dangle_size=3, max_dangle_size=10, test_ratio=0.5): assert isinstance(phenotypes, list) alphas = random.choices(range(min_dangle_size, max_dangle_size), k=int(num_anchors * test_ratio)) assert len(alphas) < len(anchor_genes) anchor_train_groups = [] anchor_test_groups = [] backbones = [] # Create all backbones for rep_id in range(num_replicates): random.shuffle(anchor_genes) candidates = anchor_genes[:int(num_anchors)] genes_of_interest_train, genes_of_interest_test = train_test_split( candidates, shuffle=True, test_size=test_ratio) anchor_train_groups.append(genes_of_interest_train) anchor_test_groups.append(genes_of_interest_test) backbones.append( build_backbone(anchors=anchor_train_groups[rep_id], alphas=alphas, weight=1, edge_percentage=percent)) # Write train anchors to file with open(os.path.join(experiment_name, 'train_anchors.csv'), 'w') as fout: for gene_group in anchor_train_groups: fout.write(','.join(gene_group)) fout.write("\n") # Write test anchors to file with open(os.path.join(experiment_name, 'test_anchors.csv'), 'w') as fout: for gene_group in anchor_test_groups: fout.write(','.join(gene_group)) fout.write("\n") # Adding the backbones and create the similarity object for pheno in phenotypes: file_name = os.path.join(data_directory, "{}.csv".format(pheno)) for rep_id in range(num_replicates): sim_file_name = "anchored_{}_{}.csv".format(pheno, str(rep_id)) out_address = os.path.join(experiment_name, sim_file_name) similarity = Similarity(file_name, anchors=anchor_train_groups[rep_id], alphas=alphas, string_id=True) similarity.transform() similarity.apply_threshold(lower_cor=0.2, upper_cor=0.8, value=0) similarity.augment(backbones[rep_id]) similarity.to_csv(out_address)