def p2v_iterations(): scores_count = (config.score_range[1] - config.score_range[0]) + 1 training_count = sum(training_set.values()) X_prompt = np.empty([training_count, 1]) X_ess = np.empty([training_count, config.essay_embedding_size]) y_score = np.empty([training_count, scores_count]) for i, essay in enumerate(d.get_essays(filterer = training_and_prompt_filter())): X_prompt[i] = np.array([ essay["prompt_id"] ]) X_ess[i] = np.array(d.essay2vec(essay["essay_id"])) one_hot = np.zeros(scores_count) one_hot[essay[config.targeted_field] - 1] = 1.0 y_score[i] = one_hot testing_count = len(training_set.keys()) - training_count X_prompt_val = np.empty([testing_count, 1]) X_ess_val = np.empty([testing_count, config.essay_embedding_size]) y_score_val = np.empty([testing_count, scores_count]) for i, essay in enumerate(d.get_essays(filterer = testing_and_prompt_filter())): X_prompt_val[i] = np.array([ essay["prompt_id"] ]) X_ess_val[i] = np.array(d.essay2vec(essay["essay_id"])) one_hot = np.zeros(scores_count) one_hot[essay[config.targeted_field] - 1] = 1.0 y_score_val[i] = one_hot p2v.out_score.fit([X_prompt, X_ess], y_score, verbose=1, batch_size=config.batch_size, nb_epoch=config.p2v_training_iterations, validation_data=([X_prompt_val, X_ess_val], y_score_val))
def p2v_iterations(): training_count = sum(training_set.values()) X_prompt = np.empty([training_count, 1]) X_ess = np.empty([training_count, config.essay_embedding_size]) y_score = np.empty([training_count, 1]) for i, essay in enumerate(d.get_essays(filterer = training_and_prompt_filter(config.targeted_prompts))): X_prompt[i] = np.array([ essay["prompt_id"] ]) X_ess[i] = np.array(d.essay2vec(essay["essay_id"])) y_score[i] = np.array([ score_to_float(essay[config.targeted_field]) ]) testing_count = len(training_set.keys()) - training_count X_prompt_val = np.empty([testing_count, 1]) X_ess_val = np.empty([testing_count, config.essay_embedding_size]) y_score_val = np.empty([testing_count, 1]) for i, essay in enumerate(d.get_essays(filterer = testing_and_prompt_filter(config.targeted_prompts))): X_prompt_val[i] = np.array([ essay["prompt_id"] ]) X_ess_val[i] = np.array(d.essay2vec(essay["essay_id"])) y_score_val[i] = np.array([ score_to_float(essay[config.targeted_field]) ]) p2v.out_score.fit([X_prompt, X_ess], y_score, verbose=1, batch_size=config.batch_size, nb_epoch=config.p2v_training_iterations, validation_data=([X_prompt_val, X_ess_val], y_score_val))
def test_all(): print "Training Set" evaluate(d.get_essays(filterer=training_and_prompt_filter(config.targeted_prompts))) print "Testing Set" evaluate(d.get_essays(filterer=testing_and_prompt_filter(config.targeted_prompts)))
def prompt_filter(prompts): counts = {} def filterer(essay): if essay["prompt_id"] in prompts: counts[essay["prompt_id"]] = (counts.get(essay["prompt_id"], 0) + 1) return counts[essay["prompt_id"]] < config.per_prompt_limit return False return filterer # # BEGIN Training and Testing Sets Selection # training_set = {} for essay in d.get_essays(filterer=prompt_filter(config.targeted_prompts)): if np.random.rand() > config.testing_slice_size: training_set[essay["essay_id"]] = True else: training_set[essay["essay_id"]] = False def training_and_prompt_filter(prompts): global training_set pf = prompt_filter(prompts) def filterer(essay): return pf(essay) and training_set[essay["essay_id"]] return filterer def testing_and_prompt_filter(prompts):
counts = {} def filterer(essay): if essay["prompt_id"] in config.targeted_prompts: counts[essay["prompt_id"]] = (counts.get(essay["prompt_id"], 0) + 1) return counts[essay["prompt_id"]] < config.per_prompt_limit return False return filterer # # BEGIN Training and Testing Sets Selection # training_set = {} for essay in d.get_essays(filterer=prompt_filter()): if np.random.rand() > config.testing_slice_size: training_set[essay["essay_id"]] = True else: training_set[essay["essay_id"]] = False def training_and_prompt_filter(): global training_set pf = prompt_filter() def filterer(essay): return pf(essay) and training_set[essay["essay_id"]] return filterer def testing_and_prompt_filter():