Пример #1
0
	def run(self):

		emotion_scores = []
		emotion_rec = []
		relevances = []

		# Loading test data and representing them in the feature space.
		write("[+] Loading items:".ljust(54,'.')) if self.debug else ''
		if self.items is None: 
			self.items = json.loads(open(self.input_path).read()) 
		test_data, starts, ends = self.get_text() 
		X_test = []
		count = 0
		for i, sentences in enumerate(test_data):
			emotion_rec.append([])
			relevances.append([])
			emotion_scores.append([0. for k in range(12)])
			cur_x = lil_matrix((len(sentences), len(self.dictionary.keys())))
			for j, sentence in enumerate(sentences):
				relevances[i].append([0. for k in range(12)]) 
				sword_vector = self.vectorize_document(sentence)
				sfeature_vector = self.feature_extraction(sword_vector)
				for key, val in sfeature_vector:
					cur_x[j,key] = val 
				count += 1
			X_test.append(cur_x)
		write("[OK]\n") if self.debug else ''

		# Training and predicting each emotion class on the given data.
		write("[+] Modeling emotions:") if self.debug else ''
		for num_class in range(12):
			self.num_class = num_class + 1
			cur_X = self.X
			cur_Y = self.Y[:,(self.num_class - 1):self.num_class] 
			best = None
			dataset_name = "ted_talks"
			method = "ap_weights"
			rating_class = self.rating_classes[self.num_class-1]
			opt = []
			for b in ['_m1','','_m3']:
				tmp = open('parameters/%d/%s%s.txt' % (self.num_class,method,b))
				best = float(tmp.readlines()[0].split(": ")[1].split("}")[0])
				opt.append(best)
			try:
				write(("\n" + " "*8 + "-> %s"  % rating_class).ljust(55,'.')) if self.debug else ''
				f = gzip.open("models/%s_model.pk" % rating_class, 'rb')
				self.model = pickle.load(f) 
				write("[OK]") if self.debug else ''
			except:
				write("\n") if self.debug else ''
				self.model = APWeights(20, l1=opt[0], l2=opt[1], l3=opt[2],  reg=self)	 
				self.model.fit(cur_X, cur_Y)  
				f = gzip.open("models/%s_model.pk" % rating_class,"wb")
				pickle.dump(self.model, f)

			pred = self.model.predict(X_test)
			for j, val in enumerate(pred):
				emotion_scores[j][num_class] = val.view(np.ndarray)[0]

			for j, weights in enumerate(self.model.P_test): 
				for i, w in enumerate(weights):
					relevances[j][i][num_class] = w

		# Compute emotion-based recommendations
		write("\n[+] Generating recommendations".ljust(55,'.')) if self.debug else ''
		sim = np.zeros((len(X_test), len(X_test)))
		for i, v1 in enumerate(emotion_scores):
			for j, v2 in enumerate(emotion_scores):
				sim[i][j] = self.cosine_measure(v1, v2)
		write("[OK]") if self.debug else ''

		real_idxs = [nid for nid in range(len(X_test))]

		# Write results into a file
		write("\n[+] Saving to output file".ljust(55,'.')) if self.debug else ''
		output = self.items
		for j, h in enumerate(self.items):
			segments = test_data[j]
			h['segments'] = []
			for i,seg in enumerate(segments):
				seg_h = {'text':seg, 
						 'start':starts[j][i],
						 'end':ends[j][i],
						 'relevance_scores':relevances[j][i]}
				h['segments'].append(seg_h)
			top, top_sim = self.n_most_similar(j, sim, real_idxs)
			h['emotion_classes'] = self.rating_classes
			h['emotion_scores'] = emotion_scores[j]
			h['emotion_rec'] = [self.items[idx]['id'] for idx in top]
			h['emotion_rec_scores'] = top_sim

		json.dump(output, open(self.output_path,"wb"))
		write("[OK]\n") if self.debug else ''
		print "[x] Finished."