Пример #1
0
def generate_cluster(sequences, dpm_subset, idx, sampler_config):
    counts         = [ [ 0.0 for j in range(sampler_config.tfbs_length) ] for i in range(4) ]
    counts_gap     = [ 0.0 ] * sampler_config.tfbs_length
    baseline_prior = get_baseline_prior(dpm_subset.dpm_subset_tag(), sampler_config)
    alpha          = baseline_prior[0:4]
    alpha_gap      = baseline_prior[4]
    components     = len(dpm_subset)
    for position in dpm_subset:
        s = position[0]
        p = position[1]
        # loop over the motif
        for j in range(sampler_config.tfbs_length):
            # loop over all nucleotides plus counts for gaps
            for i in range(4):
                counts[i][j] += sequences[s][p+j][i]
            counts_gap[j] += sequences[s][p+j][4]
    return cluster_t(counts, counts_gap, alpha, alpha_gap, components, idx, dpm_subset.dpm_subset_tag())
Пример #2
0
def load_cluster(cluster_parser, sampler_config, cluster_name):
    result       = re.match('cluster_([0-9]+)', cluster_name)
    counts, counts_gap = read_counts(cluster_parser, cluster_name)
    components   = int(cluster_parser.get('Cluster', '%s_components' % cluster_name))
    identifier   = int(result.group(1))
    cluster_type = cluster_parser.get('Cluster', '%s_type' % cluster_name)
    alpha        = None
    alpha_gap    = None
    sites        = None
    if cluster_parser.has_option('Cluster', '%s_sites' % cluster_name):
        sites_str = cluster_parser.get('Cluster', '%s_sites' % cluster_name)
        sites     = dpm_subset_t(cluster_type)
        for elem in parse_partition_elements(sites_str):
            sites.insert(elem)
    for (tag, prior) in zip(sampler_config.baseline_tags, sampler_config.baseline_priors):
        if tag == cluster_type:
            alpha     = map(list, zip(*prior))[0:4]
            alpha_gap = map(list, zip(*prior))[4]
    if alpha == None or alpha_gap == None:
        raise IOError("Baseline prior not found in sampler config.")
    return cluster_t(counts, counts_gap, alpha, alpha_gap, components, identifier, cluster_type, sites = sites)
Пример #3
0
def generate_cluster(sequences, dpm_subset, idx, sampler_config, index_error = True):
    length         = dpm_subset.model_id().length
    counts         = [ [ 0.0 for j in range(length) ] for i in range(4) ]
    counts_gap     = [ 0.0 ] * length
    baseline_prior = get_baseline_prior(dpm_subset.model_id(), sampler_config)
    alpha          = [ [ baseline_prior[i][0] for j in range(length) ] for i in range(4) ]
    alpha_gap      = baseline_prior[4] * length
    components     = len(dpm_subset)
    for r in dpm_subset:
        s = r.index()[0]
        p = r.index()[1]
        # loop over the motif
        for j in range(r.length()):
            # loop over all nucleotides plus counts for gaps
            if r.reverse():
                try: sequences[s][p+j][DNA.complement(i)]
                except IndexError:
                    if index_error:
                        raise
                    else:
                        print >> sys.stderr, "Warning: index %d:%d out of range!" % (s, p+j)
                        continue
                for i in range(4):
                    counts[i][r.length()-j-1] += sequences[s][p+j][DNA.complement(i)]
            else:
                try: sequences[s][p+j][i]
                except IndexError:
                    if index_error:
                        raise
                    else:
                        print >> sys.stderr, "Warning: index %d:%d out of range!" % (s, p+j)
                        continue
                for i in range(4):
                    counts[i][j] += sequences[s][p+j][i]
            counts_gap[j] += sequences[s][p+j][4]
    return cluster_t(counts, counts_gap, alpha, alpha_gap, components, idx, dpm_subset.model_id(), sites=dpm_subset)