Пример #1
0
def positivity(address):
	################# read canopy output #################
	positivity_scores = BTree()
	with open(address,'r') as f:
	    for line in f:
	        canopy_members = line.strip().split()
	        total = float(len(canopy_members))
	        number_of_positive = 0
	        for member in canopy_members:
	            if membertag_to_class(member) == 1:
	                number_of_positive = number_of_positive + 1
	        p_score = float(number_of_positive)/total
	        for member in canopy_members:
	            try:
	                score_list = positivity_scores[member]
	                score_list.append(p_score)
	                positivity_scores[member] = score_list
	            except:
	                positivity_scores[member] = [p_score]
	############### take average score ###################
	for key in positivity_scores.keys():
	    score_list = positivity_scores[key]
	    sum = float(0)
	    for item in score_list:
	        sum = sum + item
	    final_score = sum/float(len(score_list))
	    positivity_scores[key] = final_score
	################ prepare output ####################
	output = ''
	for key,value in positivity_scores.items():
	    output = output + key + ' ' + str(value) + '\n'
	with open(positivity_outputs, 'w') as f:
	    f.write(output)
Пример #2
0
def train_set_positivity():
	data = BTree()
	with open("positivity_outputs.txt", "r") as f:
		for line in f:
			line = line.strip().split()
			sample = line[0].split("_")[0]
			seq_number = line[0].split("_")[1]
			score = line[1]
			try:
				curr = data[sample]
				curr.append((seq_number,score))
				data[sample] = curr
			except:
				data[sample] = [(seq_number,score)]
	for key, val in data.items():
		with open(os.path.join(train_set_positivity_scores, key), "w") as f:
			for item in val:
				f.write(item[0] + " " + item[1] + "\n")