def setup_belief(include_more_specific=False): # Make a model lr = LogisticRegression() # Get all the sources source_list = CountsScorer.get_all_sources(test_stmts_cur) cs = CountsScorer(lr, source_list, include_more_specific=include_more_specific) # Train on curated stmt data if include_more_specific: extra_evidence = [[ ev for supp in stmt.supports for ev in supp.evidence ] for stmt in test_stmts_cur] else: extra_evidence = None # Fit with extra evidence, if any cs.fit(test_stmts_cur, y_arr_stmts_cur, extra_evidence) # Run predictions on test statements without extra evidence to get prior # probs probs = cs.predict_proba(test_stmts_cur)[:, 1] # Now check if we get these same beliefs set on the statements when we # run with the belief engine: # Get scorer and belief engine instances for trained model be = BeliefEngine(scorer=cs) # Make a shallow copy of the test stmts so that we don't change beliefs # of the global instances as a side-effect of this test test_stmts_copy = copy(test_stmts_cur) return be, test_stmts_copy, probs
def test_fit_stmts_predict_stmts(): lr = LogisticRegression() source_list = ['reach', 'sparser', 'signor'] cw = CountsScorer(lr, source_list) cw.fit(test_stmts, y_arr_stmts) probs = cw.predict_proba(test_stmts) assert probs.shape == (len(test_stmts), 2), \ 'prediction results should have dimension (# stmts, # classes)' log_probs = cw.predict_log_proba(test_stmts) assert log_probs.shape == (len(test_stmts), 2), \ 'prediction results should have dimension (# stmts, # classes)' preds = cw.predict(test_stmts) assert preds.shape == (len(test_stmts),), \ 'prediction results should have dimension (# stmts)'
def test_hybrid_scorer(): # First instantiate and train the SimpleScorer on readers # Make a model lr = LogisticRegression() # Get all the sources source_list = CountsScorer.get_all_sources(test_stmts_cur) # The sources for this sample (test_stmts_cur) include only: trips, # sparser, medscan, hprd, and reach. Of these, we'll set aside hprd to be # scored by the simplescorer and the other to be scored by the CountsScorer skl_sources = ['trips', 'sparser', 'medscan', 'reach'] cs = CountsScorer(lr, skl_sources) # Train on curated stmt data cs.fit(test_stmts_cur, y_arr_stmts_cur) # Run predictions on test statements for later comparison cs_beliefs = cs.predict_proba(test_stmts_cur)[:, 1] # Next, get the default SimpleScorer: ss = default_scorer # Let's check the prior probability associated with HPRD hprd_rand = ss.prior_probs['rand']['hprd'] hprd_syst = ss.prior_probs['syst']['hprd'] # Now instantiate a HybridScorer hs = HybridScorer(cs, ss) # Check that sources are accounted for hs.check_prior_probs(test_stmts_cur) # Score the statements with the HybridScorer hybrid_beliefs = hs.score_statements(test_stmts_cur) # Look at each statement and check that the belief is what's expected # based on the skl-predicted belief and the HPRD evidence from the # simple scorer expected_beliefs = [] for ix, stmt in enumerate(test_stmts_cur): # Check the sources stmt_sources = Counter([ev.source_api for ev in stmt.evidence]) # If statement has no HPRD evidence, we expect the belief to be # the same as the skl-predicted belief if 'hprd' not in stmt_sources: expected_beliefs.append(cs_beliefs[ix]) # Otherwise, calculate belief incorporating HPRD evidences else: # How many HPRD evidences? hprd_count = stmt_sources['hprd'] print("hprd_count", hprd_count) hprd_belief = 1 - (hprd_syst + hprd_rand**hprd_count) expected_beliefs.append(1 - (1 - cs_beliefs[ix]) * (1 - hprd_belief)) print(expected_beliefs[ix], hybrid_beliefs[ix]) assert np.allclose(hybrid_beliefs, expected_beliefs)
def setup_belief(): # Make a model lr = LogisticRegression() # Get all the sources source_list = CountsScorer.get_all_sources(test_stmts_cur) cs = CountsScorer(lr, source_list) # Train on curated stmt data cs.fit(test_stmts_cur, y_arr_stmts_cur) # Run predictions on test statements probs = cs.predict_proba(test_stmts_cur)[:, 1] # Now check if we get these same beliefs set on the statements when we # run with the belief engine: # Get scorer and belief engine instances for trained model be = BeliefEngine(scorer=cs) # Make a shallow copy of the test stmts so that we don't change beliefs # of the global instances as a side-effect of this test test_stmts_copy = copy(test_stmts_cur) return be, test_stmts_copy, probs