Пример #1
0
generic_threshold = 60
dict_genes = lv.get_generic_specific_genes(data, generic_threshold)

# +
# Check overlap between multiplier genes and our genes
multiplier_genes = list(multiplier_model_z.index)
our_genes = list(data.index)
shared_genes = set(our_genes).intersection(multiplier_genes)

print(len(our_genes))
print(len(shared_genes))
# -

# Drop gene ids not used in multiplier analysis
processed_dict_genes = lv.process_generic_specific_gene_lists(
    dict_genes, multiplier_model_z)

# Check numbers add up
assert len(shared_genes) == len(processed_dict_genes["generic"]) + len(
    processed_dict_genes["other"])

# ## Get coverage of LVs
#
# For each gene (generic or other) we want to find:
# 1. The number of LVs that gene is present
# 2. The number of LVs that the gene contributes a lot to (i.e. the gene is highly weighted within that LV)

# ### Nonzero LV coverage

dict_nonzero_coverage = lv.get_nonzero_LV_coverage(processed_dict_genes,
                                                   multiplier_model_z)
Пример #2
0
generic_threshold = 80
dict_genes = lv.get_generic_specific_genes(data, generic_threshold)

# +
# Check overlap between eADAGE genes and our genes
eADAGE_genes = list(eADAGE_weight.index)
our_genes = list(data.index)
shared_genes = set(our_genes).intersection(eADAGE_genes)

print(len(our_genes))
print(len(shared_genes))
# -

# Drop gene ids not used in eADAGE analysis
processed_dict_genes = lv.process_generic_specific_gene_lists(
    dict_genes, eADAGE_weight)

# Check numbers add up
assert len(shared_genes) == len(processed_dict_genes["generic"]) + len(
    processed_dict_genes["other"])

# ## Get coverage of LVs
#
# For each gene (generic or other) we want to find:
# 1. The number of LVs that gene is present
# 2. The number of LVs that the gene contributes a lot to (i.e. the gene is highly weighted within that LV)

# ### Nonzero LV coverage

dict_nonzero_coverage = lv.get_nonzero_LV_coverage(processed_dict_genes,
                                                   eADAGE_weight)