示例#1
0
def create_sample_dropout(sample_id, num_vars: int,
                          bicluster_list: List[SimBicluster],
                          epi_data: np.ndarray, num_clusters: int,
                          epi_error: float, prob_drop_out: float):

    chosen_clusters = random.sample(bicluster_list, num_clusters)
    chosen_vars = set()
    for cluster in chosen_clusters:
        chosen_vars.update(cluster.vars)
        cluster.samples.add(sample_id)
    sample_clean = [
        random.normalvariate(10, 1)
        if i in chosen_vars else random.normalvariate(0, 1)
        for i in range(num_vars)
    ]
    sample = [
        sample_var if random.random() < prob_drop_out else 0
        for sample_var in sample_clean
    ]
    epi_row = [
        np.random.binomial(1, 1 - epi_error)
        if i in chosen_vars else np.random.binomial(1, epi_error)
        for i in range(num_vars)
    ]
    epi_data.__iadd__(epi_row)
    return sample
示例#2
0
def create_sample_easy(sample_id, num_vars: int,
                       bicluster_list: List[SimBicluster],
                       epi_data: np.ndarray, num_tfs: int, epi_error: float,
                       error_type: str):
    '''

    :param sample_id: Sample unique id
    :param num_vars: number of genes
    :param bicluster_list: list of bicluster objects
    :param epi_data: epigenetic gene vector
    :param num_tfs: number of bicluters the sample belongs too
    :param epi_error: error in epigentic measurements
    :return: sample vector of gene measurements
    '''

    #randomly choose biclusters that the
    chosen_clusters = random.sample(bicluster_list, num_tfs)
    chosen_vars = set()
    #union set of genes that are activated in sample
    for cluster in chosen_clusters:
        chosen_vars.update(cluster.vars)
        cluster.samples.add(sample_id)

    #sample = [random.normalvariate(5,1) if i in chosen_vars else random.normalvariate(0,1)
    #for i in range(num_vars)]
    if error_type == "mode":
        state_row = [
            np.random.binomial(1, 1 - epi_error)
            if i in chosen_vars else np.random.binomial(1, epi_error)
            for i in range(num_vars)
        ]
        sample = [
            random.normalvariate(3, 1)
            if state_row[i] == 1 else random.normalvariate(0, 1)
            for i in range(num_vars)
        ]
    else:
        sample = [
            random.normalvariate(3, epi_error)
            if i in chosen_vars else random.normalvariate(0, epi_error)
            for i in range(num_vars)
        ]

    #update the epigenetic vector with noise
    epi_row = [1.0 if i in chosen_vars else 0.0 for i in range(num_vars)]
    #epi_row = [np.random.binomial(1, 1 - epi_error) if i in chosen_vars else np.random.binomial(1, epi_error)
    #for i in range(num_vars)]
    #epi_row = [1 if i in chosen_vars else 0 for i in range(num_vars)]
    epi_data.__iadd__(epi_row)
    return sample