Пример #1
0
def p2v_iterations():
	scores_count = (config.score_range[1] - config.score_range[0]) + 1
	training_count = sum(training_set.values())
	X_prompt = np.empty([training_count, 1])
	X_ess = np.empty([training_count, config.essay_embedding_size])
	y_score = np.empty([training_count, scores_count])

	for i, essay in enumerate(d.get_essays(filterer = training_and_prompt_filter())):
		X_prompt[i] = np.array([ essay["prompt_id"] ])
		X_ess[i] = np.array(d.essay2vec(essay["essay_id"]))
		one_hot = np.zeros(scores_count)
		one_hot[essay[config.targeted_field] - 1] = 1.0
		y_score[i] = one_hot
	
	testing_count = len(training_set.keys()) - training_count
	X_prompt_val = np.empty([testing_count, 1])
	X_ess_val = np.empty([testing_count, config.essay_embedding_size])
	y_score_val = np.empty([testing_count, scores_count])
	for i, essay in enumerate(d.get_essays(filterer = testing_and_prompt_filter())):
		X_prompt_val[i] = np.array([ essay["prompt_id"] ])
		X_ess_val[i] = np.array(d.essay2vec(essay["essay_id"]))
		one_hot = np.zeros(scores_count)
		one_hot[essay[config.targeted_field] - 1] = 1.0
		y_score_val[i] = one_hot

	p2v.out_score.fit([X_prompt, X_ess], y_score, verbose=1, batch_size=config.batch_size, nb_epoch=config.p2v_training_iterations, validation_data=([X_prompt_val, X_ess_val], y_score_val))
Пример #2
0
def p2v_iterations():
	training_count = sum(training_set.values())
	X_prompt = np.empty([training_count, 1])
	X_ess = np.empty([training_count, config.essay_embedding_size])
	y_score = np.empty([training_count, 1])
	for i, essay in enumerate(d.get_essays(filterer = training_and_prompt_filter(config.targeted_prompts))):
		X_prompt[i] = np.array([ essay["prompt_id"] ])
		X_ess[i] = np.array(d.essay2vec(essay["essay_id"]))
		y_score[i] = np.array([ score_to_float(essay[config.targeted_field]) ])

	testing_count = len(training_set.keys()) - training_count
	X_prompt_val = np.empty([testing_count, 1])
	X_ess_val = np.empty([testing_count, config.essay_embedding_size])
	y_score_val = np.empty([testing_count, 1])
	for i, essay in enumerate(d.get_essays(filterer = testing_and_prompt_filter(config.targeted_prompts))):
		X_prompt_val[i] = np.array([ essay["prompt_id"] ])
		X_ess_val[i] = np.array(d.essay2vec(essay["essay_id"]))
		y_score_val[i] = np.array([ score_to_float(essay[config.targeted_field]) ])

	p2v.out_score.fit([X_prompt, X_ess], y_score, verbose=1, batch_size=config.batch_size, nb_epoch=config.p2v_training_iterations, validation_data=([X_prompt_val, X_ess_val], y_score_val))
Пример #3
0
def test_all():
	print "Training Set"
	evaluate(d.get_essays(filterer=training_and_prompt_filter(config.targeted_prompts)))
	print "Testing Set"
	evaluate(d.get_essays(filterer=testing_and_prompt_filter(config.targeted_prompts)))
Пример #4
0
def prompt_filter(prompts):
	counts = {}
	
	def filterer(essay):
		if essay["prompt_id"] in prompts:
			counts[essay["prompt_id"]] = (counts.get(essay["prompt_id"], 0) + 1)
			return counts[essay["prompt_id"]] < config.per_prompt_limit
		return False

	return filterer

#
# BEGIN Training and Testing Sets Selection
#
training_set = {}
for essay in d.get_essays(filterer=prompt_filter(config.targeted_prompts)):
	if np.random.rand() > config.testing_slice_size:
		training_set[essay["essay_id"]] = True
	else:
		training_set[essay["essay_id"]] = False

def training_and_prompt_filter(prompts):
	global training_set

	pf = prompt_filter(prompts)
	def filterer(essay):
		return pf(essay) and training_set[essay["essay_id"]]

	return filterer

def testing_and_prompt_filter(prompts):
Пример #5
0
	counts = {}
	
	def filterer(essay):
		if essay["prompt_id"] in config.targeted_prompts:
			counts[essay["prompt_id"]] = (counts.get(essay["prompt_id"], 0) + 1)
			return counts[essay["prompt_id"]] < config.per_prompt_limit
		return False

	return filterer


#
# BEGIN Training and Testing Sets Selection
#
training_set = {}
for essay in d.get_essays(filterer=prompt_filter()):
	if np.random.rand() > config.testing_slice_size:
		training_set[essay["essay_id"]] = True
	else:
		training_set[essay["essay_id"]] = False

def training_and_prompt_filter():
	global training_set

	pf = prompt_filter()
	def filterer(essay):
		return pf(essay) and training_set[essay["essay_id"]]

	return filterer

def testing_and_prompt_filter():