Exemplo n.º 1
0
def plot_sim_distance(inputfile, outfile, simhash_type, proto_type,
		avg_dist=True):
	simhash_type = get_simhash_type(simhash_type, True)
	sites = getattr(CD, proto_type)()
	read_proto_from_file(sites, inputfile)
	out_f = open(outfile, "w")
	if proto_type == "LearnedSites":
		for learned_site in sites.site:
			out_f.write(learned_site.name + "," + str(len(learned_site.pattern)) + "\n")
			for pattern in learned_site.pattern:
				dist_list = simhash_vector_distance(pattern.item,
						avg_dist)
				out_f.write("pattern\n" + "\n".join([str(d) for d in
					dist_list]) + "\n")
		out_f.close()
	elif proto_type == "ObservedSites":
		for observed_site in sites.site:
			out_f.write(observed_site.name + "," + str(len(observed_site.observation)) + "\n")
			simhash_item_vector = aggregate_simhash(observed_site, simhash_type)
			dist_list = simhash_vector_distance(simhash_item_vector,
					avg_dist)
			out_f.write("\n".join([str(d) for d in dist_list]) + "\n")
		out_f.close()
	else:
		raise Exception("Wrong proto! Only LearnedSites and ObservedSites can be used!")
Exemplo n.º 2
0
def plot_simhash(inputfile, outfile, simhash_type, proto_type):
	simhash_type = get_simhash_type(simhash_type)
	sites = getattr(CD, proto_type)()
	read_proto_from_file(sites, inputfile)
	out_f = open(outfile, "w")
	if proto_type == "LearnedSites":
		for site in sites.site:
			observation_size = 0
			for pattern in site.pattern:
				for item in pattern.item:
					observation_size += item.count
			out_f.write(site.name + "," + str(observation_size) + "\n")
			for pattern in site.pattern:
				for item in pattern.item:
					item_str = "%0.16x" % item.simhash
					item_str_array = [item_str for i in range(item.count)]
					out_f.write("\n".join(item_str_array) + "\n")
		out_f.close()
	elif proto_type == "ObservedSites":
		for site in sites.site:
			out_f.write(site.name + "," + str(len(site.observation)) + "\n")
			for observation in site.observation:
				simhash_str = "%0.16x" % getattr(observation, simhash_type)
				out_f.write(simhash_str + "\n")
		out_f.close()
	else:
		raise Exception("Wrong proto! Only LearnedSites and ObservedSites can be used!")
Exemplo n.º 3
0
def build_site_simhash_dict(observed_sites):
	"""
	Return two dict, one maps site name to all the simhashs,
	the other maps site name to observed site
	"""
	valid_instance(observed_sites, CD.ObservedSites)
	site_simhash_dict = dict()
	observed_sites_dict = dict()
	attr_name = get_simhash_type(observed_sites.config.simhash_type)
	for observed_site in observed_sites.site:
		if not observed_site.name in site_simhash_dict:
			site_simhash_dict[observed_site.name] = set()
			observed_sites_dict[observed_site.name] = observed_site
		for observation in observed_site.observation:
			site_simhash_dict[observed_site.name].add(getattr(observation,
				attr_name))
	return site_simhash_dict, observed_sites_dict