def generate_cluster(sequences, dpm_subset, idx, sampler_config): counts = [ [ 0.0 for j in range(sampler_config.tfbs_length) ] for i in range(4) ] counts_gap = [ 0.0 ] * sampler_config.tfbs_length baseline_prior = get_baseline_prior(dpm_subset.dpm_subset_tag(), sampler_config) alpha = baseline_prior[0:4] alpha_gap = baseline_prior[4] components = len(dpm_subset) for position in dpm_subset: s = position[0] p = position[1] # loop over the motif for j in range(sampler_config.tfbs_length): # loop over all nucleotides plus counts for gaps for i in range(4): counts[i][j] += sequences[s][p+j][i] counts_gap[j] += sequences[s][p+j][4] return cluster_t(counts, counts_gap, alpha, alpha_gap, components, idx, dpm_subset.dpm_subset_tag())
def load_cluster(cluster_parser, sampler_config, cluster_name): result = re.match('cluster_([0-9]+)', cluster_name) counts, counts_gap = read_counts(cluster_parser, cluster_name) components = int(cluster_parser.get('Cluster', '%s_components' % cluster_name)) identifier = int(result.group(1)) cluster_type = cluster_parser.get('Cluster', '%s_type' % cluster_name) alpha = None alpha_gap = None sites = None if cluster_parser.has_option('Cluster', '%s_sites' % cluster_name): sites_str = cluster_parser.get('Cluster', '%s_sites' % cluster_name) sites = dpm_subset_t(cluster_type) for elem in parse_partition_elements(sites_str): sites.insert(elem) for (tag, prior) in zip(sampler_config.baseline_tags, sampler_config.baseline_priors): if tag == cluster_type: alpha = map(list, zip(*prior))[0:4] alpha_gap = map(list, zip(*prior))[4] if alpha == None or alpha_gap == None: raise IOError("Baseline prior not found in sampler config.") return cluster_t(counts, counts_gap, alpha, alpha_gap, components, identifier, cluster_type, sites = sites)
def generate_cluster(sequences, dpm_subset, idx, sampler_config, index_error = True): length = dpm_subset.model_id().length counts = [ [ 0.0 for j in range(length) ] for i in range(4) ] counts_gap = [ 0.0 ] * length baseline_prior = get_baseline_prior(dpm_subset.model_id(), sampler_config) alpha = [ [ baseline_prior[i][0] for j in range(length) ] for i in range(4) ] alpha_gap = baseline_prior[4] * length components = len(dpm_subset) for r in dpm_subset: s = r.index()[0] p = r.index()[1] # loop over the motif for j in range(r.length()): # loop over all nucleotides plus counts for gaps if r.reverse(): try: sequences[s][p+j][DNA.complement(i)] except IndexError: if index_error: raise else: print >> sys.stderr, "Warning: index %d:%d out of range!" % (s, p+j) continue for i in range(4): counts[i][r.length()-j-1] += sequences[s][p+j][DNA.complement(i)] else: try: sequences[s][p+j][i] except IndexError: if index_error: raise else: print >> sys.stderr, "Warning: index %d:%d out of range!" % (s, p+j) continue for i in range(4): counts[i][j] += sequences[s][p+j][i] counts_gap[j] += sequences[s][p+j][4] return cluster_t(counts, counts_gap, alpha, alpha_gap, components, idx, dpm_subset.model_id(), sites=dpm_subset)