pred_tagged_essays_train = dill.load(f) test_fname = coref_output_folder + "test_crel_anatagged_essays_most_recent_code.dill" with open(test_fname, "rb") as f: pred_tagged_essays_test = dill.load(f) # Get Test Data In Order to Get Test CRELS # load the test essays to make sure we compute metrics over the test CR labels config = get_config(coref_output_folder) results_processor = ResultsProcessor(dbname="metrics_causal_model") ######################################################## logger.info("Started at: " + str(datetime.datetime.now())) logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays_train)) # should be 902 cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test) cv_folds = [(pred_tagged_essays_train, pred_tagged_essays_test)] # type: List[Tuple[Any,Any]] def evaluate_model( collection_prefix: str, folds: List[Tuple[Any, Any]], extractor_fn_names_lst: List[str], cost_function_name: str, beta: float, ngrams: int, stemmed: bool, max_epochs: int, down_sample_rate=1.0) -> float: if down_sample_rate < 1.0: new_folds = [] # type: List[Tuple[Any, Any]]
# Get Test Data In Order to Get Test CRELS # load the test essays to make sure we compute metrics over the test CR labels test_config = get_config(test_folder) tagged_essays_test = load_process_essays(**test_config) ######################################################## fname = rnn_predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill" with open(fname, "rb") as f: pred_tagged_essays = dill.load(f) logger.info("Started at: " + str(datetime.datetime.now())) logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays)) # should be 902 cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays, tag_essays_test=tagged_essays_test) cv_folds = cross_validation(pred_tagged_essays, CV_FOLDS) # type: List[Tuple[Any,Any]] def evaluate_model(collection_prefix: str, folds: List[Tuple[Any, Any]], extractor_fn_names_lst: List[str], cost_function_name: str, beta: float, ngrams: int, stemmed: bool, max_epochs: int, down_sample_rate=1.0) -> float: if down_sample_rate < 1.0: