Exemplo n.º 1
0
def show_distance(num=0):
	genomes = PPR.load_genomes()

	ppr = genomes[0].pprs[num]
	closest = minima(ppr, genomes)
	left = range(len(closest[0]))

	plt.bar(left, closest[1], width=1.0)
	plt.xticks([l+0.5 for l in left], closest[0], rotation=90)
	plt.show()
Exemplo n.º 2
0
def ppr_distance():
	genomes = PPR.load_genomes()

	genomes = [g for g in genomes if len(g) > 50]

	data = []

	for i,g in enumerate(genomes, 1):
		print "Genome {}/{}: {} ({})".format(i, len(genomes),
				g.name,short_name(g.name))
		data.append([short_name(g.name),] + get_average(g,genomes))


	utils.write_data(['',] + [reduce_name(g.name) for g in genomes],
			data,
			'output/Average_PPR_distance.dat')
Exemplo n.º 3
0
def get_groups(threshold=0.3):
	"""Attempt to group similar PPRs"""

	stdout.write("\rLoading...")
	stdout.flush()

	genomes = PPR.load_genomes()

	groups = []

	total = sum([sum([1 for p in g.pprs]) for g in genomes])
	done = 0
	fmt = "\rGrouping ({{:{}d}}/{{}}) | {{:2.1f}}%, {{:.1f}} per group     ".format(
			len(str(total)))
	for genome in genomes:
		for ppr in genome.pprs:
			#find the best group
			possible = []
			#groups are possible if the distance to each member is lte threshold
			for group in groups:
				if max([ppr.distance(p2, 'MSE')[0] for p2 in group]) <= threshold:
					possible.append(group)
			#if there are no possible groups, start a new one
			if not possible:
				groups.append([ppr,])
			#otherwise, append to the group with the lowest average distance
			else:
				avg = [sum([ppr.distance(p2, 'MSE')[0] for p2 in group])/float(len(group))
									for group in possible]
				possible[avg.index(min(avg))].append(ppr)
			#Write progress info
			done += 1
			stdout.write(fmt.format(done,total, 100.0 * done / float(total),
				done / float(len(groups))))
			stdout.flush()

	stdout.write("\r" + (" "*60) + "\r")
	stdout.flush()
	return groups