def test_incremental_assembler_constructor(): ia = IncrementalAssembler([s1, s2]) assert ia.prepared_stmts == [s1, s2] assert ia.stmts_by_hash == {s1h: s1, s2h: s2} assert ia.evs_by_stmt_hash == {s1h: [ev1], s2h: [ev2]}, ia.evs_by_stmt_hash assert ia.refinement_edges == {(s1h, s2h)} assert set(ia.refinements_graph.nodes()) == {s1h, s2h} assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2} assert set(ia.get_all_supporting_evidence(s2h)) == {ev2}
def test_incremental_assembler_add_statement_duplicate(): ev3 = Evidence('eidos', text='3') s3 = Influence(e1, e2, ev3) s3h = s3.get_hash(matches_fun=location_matches_compositional) ia = IncrementalAssembler([s1, s2]) delta = ia.add_statements([s3]) assert not delta.new_stmts, delta.new_stmts assert delta.new_evidences == {s3h: [ev3]}, delta.new_evidences assert not delta.new_refinements, delta.new_refinements # TODO: test beliefs assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2, ev3} assert set(ia.get_all_supporting_evidence(s2h)) == {ev2}
def test_post_processing_new_stmts(): stmts = copy.deepcopy([s1, s2]) ia = IncrementalAssembler([stmts[0]]) delta = ia.add_statements([stmts[1]]) assert len(delta.new_stmts) == 1 stmt = list(delta.new_stmts.values())[0] assert stmt.subj.concept.name == 'crop' # Check that we added annotations assert 'agents' in stmt.evidence[0].annotations assert stmt.evidence[0].annotations['agents'] == { 'raw_text': ['some_text2', 'some_text2'] }, stmt.evidence[0].annotations['agents']
def test_incremental_assembler_add_statement_new_refinement(): ev4 = Evidence('eidos', text='4') s4 = Influence(e2, e4, ev4) s4h = s4.get_hash(matches_fun=location_matches_compositional) ia = IncrementalAssembler([s1, s2]) delta = ia.add_statements([s4]) assert delta.new_stmts, {s4h: s4} assert delta.new_evidences == {s4h: [ev4]}, delta.new_evidences assert delta.new_refinements == {(s1h, s4h), (s2h, s4h)}, \ delta.new_refinements # TODO: test beliefs assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2, ev4} assert set(ia.get_all_supporting_evidence(s2h)) == {ev2, ev4} assert set(ia.get_all_supporting_evidence(s4h)) == {ev4}
def test_post_processing_all_stmts(): stmts = copy.deepcopy([s1, s2]) ia = IncrementalAssembler(stmts) stmts_out = ia.get_statements() # Check that we normalized concept names assert stmts_out[0].subj.concept.name == 'agriculture' # Check that we added flattened groundings flat_grounding = [{'grounding': 'wm/concept/agriculture', 'name': 'agriculture', 'score': 1.0}] assert stmts_out[0].subj.concept.db_refs['WM_FLAT'] == \ flat_grounding, flat_grounding # Check that we added annotations assert 'agents' in stmts_out[0].evidence[0].annotations assert stmts_out[0].evidence[0].annotations['agents'] == { 'raw_text': ['some_text1', 'some_text2'] }, stmts_out[0].evidence[0].annotations['agents']
def assemble(self): """Run assembly on the prepared statements. This function loads all the prepared statements associated with the corpus and then runs assembly on them. """ all_stmts = [] for record in self.dart_records: stmts = self.sc.db.get_statements_for_document( document_id=record['document_id'], reader=record['reader'], reader_version=record['reader_version']) all_stmts += stmts ia = IncrementalAssembler(all_stmts) self.assembled_stmts = ia.get_statements() self.metadata['num_statements'] = len(self.assembled_stmts)
def test_apply_grounding_curation(): gr1 = [('theme1', 0.8), None, ('process', 0.7), None] gr2 = ['theme2', 'property2', None, None] cur = { "before": {"subj": {"factor": 'x', "concept": gr1}, "obj": {"factor": 'y', "concept": 'z'}}, "after": {"subj": {"factor": 'x', "concept": gr2}, "obj": {"factor": 'y', "concept": 'z'}}, } c1 = Concept('x', db_refs={'WM': [gr1]}) stmt = Influence(Event(c1), Event('y')) IncrementalAssembler.apply_grounding_curation(stmt, cur) assert stmt.subj.concept.db_refs['WM'][0] == \ [('theme2', 1.0), ('property2', 1.0), None, None]
def assemble(self): """Run assembly on the prepared statements. This function loads all the prepared statements associated with the corpus and then runs assembly on them. """ all_stmts = [] logger.info('Loading statements from DB for %d records' % len(self.dart_records)) for record in tqdm.tqdm(self.dart_records): stmts = self.sc.db.get_statements_for_record(record['storage_key']) all_stmts += stmts logger.info('Instantiating incremental assembler with %d statements' % len(all_stmts)) ia = IncrementalAssembler(all_stmts) logger.info('Getting assembled statements') self.assembled_stmts = ia.get_statements() logger.info('Got %d assembled statements' % len(self.assembled_stmts)) self.metadata['num_statements'] = len(self.assembled_stmts)
def load_project(self, project_id, record_keys=None): # 1. Select records associated with project if record_keys is None: record_keys = self.db.get_records_for_project(project_id) # 2. Select statements from prepared stmts table prepared_stmts = [] for record_key in record_keys: prepared_stmts += self.db.get_statements_for_record(record_key) # 3. Select curations for project curations = self.get_project_curations(project_id) # 4. Initiate an assembler assembler = IncrementalAssembler(prepared_stmts, curations=curations) self.assemblers[project_id] = assembler