Пример #1
0
def test_running_pipeline():
    # From json file
    ap = AssemblyPipeline.from_json_file(test_json)
    assert ap
    # AssemblyPipeline has methods for length and iteration
    assert len(ap) == 5
    for step in ap:
        assert step
    assembled_stmts = ap.run(stmts)
    assert assembled_stmts
    assert len(assembled_stmts) == 2
    # By manually adding steps
    ap2 = AssemblyPipeline()
    ap2.append(filter_no_hypothesis)
    ap2.append(map_grounding)
    ap2.append(filter_grounded_only)
    ap2.append(map_sequence)
    ap2.append(run_preassembly, return_toplevel=False)
    assembled_stmts2 = ap2.run(stmts)
    assert assembled_stmts2
    assert len(assembled_stmts2) == 2
Пример #2
0
                                   corpus_id,
                                   grounding_mode='compositional',
                                   extract_filter=['influence'])
    '''
    stmts = []
    for reader in reader_versions['compositional']:
        logger.info('Loading %s' % reader)
        if os.path.exists('compositional_dec2020_%s_raw.pkl' % reader):
            with open('compositional_dec2020_%s_raw.pkl' % reader, 'rb') as fh:
                stmts += pickle.load(fh)
    '''
    logger.info('Got a total of %s statements' % len(stmts))
    assembly_config_file = os.path.join(HERE, os.pardir, 'indra_wm_service',
                                        'resources',
                                        'assembly_compositional_phase3.json')
    pipeline = AssemblyPipeline.from_json_file(assembly_config_file)
    assembled_stmts = pipeline.run(stmts)

    num_docs = 472
    meta_data = {
        'corpus_id': corpus_id,
        'description': 'Compositional grounding assembly for the intial '
        'Phase 3 documents, Eidos only.',
        'display_name': 'Compositional grounding assembly Phase 3 (Eidos)',
        'readers': list(reader_versions.keys()),
        'assembly': {
            'level': 'grounding_location',
            'grounding_threshold': 0.6,
        },
        'num_statements': len(assembled_stmts),
        'num_documents': num_docs
Пример #3
0
        ot = get_text(stmt.obj)
        if text_too_long(st, k) or text_too_long(ot, k):
            continue
        new_stmts.append(stmt)
    logger.info(f'{len(new_stmts)} statements after filter.')
    return new_stmts


if __name__ == '__main__':
    # Load all raw statements
    eidos_stmts = load_eidos()
    hume_stmts = load_hume()
    sofia_stmts = load_sofia()
    cwms_stmts = load_cwms()

    hume_ap = AssemblyPipeline.from_json_file('hume_redundant.json')
    hume_stmts = hume_ap.run(hume_stmts)

    # Reground where needed
    reground_ap = AssemblyPipeline.from_json_file('reground_stmts.json')
    sofia_stmts = reground_ap.run(sofia_stmts)
    cwms_stmts = reground_ap.run(cwms_stmts)

    # Run shared assembly steps
    stmts = eidos_stmts + hume_stmts + sofia_stmts + cwms_stmts
    ap = AssemblyPipeline.from_json_file('assembly_steps.json')
    stmts = ap.run(stmts)

    funs = {
        'grounding': None,
        'location': location_matches,
Пример #4
0
import logging
import datetime
from indra_world.sources.dart import process_reader_output, DartClient
from indra_world.assembly.incremental_assembler import \
    IncrementalAssembler
from indra_world.resources import get_resource_file
from indra.pipeline import AssemblyPipeline
from indra_world.assembly.operations import *
from .db import DbManager

logger = logging.getLogger(__name__)

preparation_pipeline = AssemblyPipeline.from_json_file(
    get_resource_file('statement_preparation.json'))

expected_readers = {'eidos', 'hume', 'sofia'}


class ServiceController:
    def __init__(self, db_url, dart_client=None):
        self.db = DbManager(db_url)
        self.assemblers = {}
        self.assembly_triggers = {}
        if dart_client:
            self.dart_client = dart_client
        else:
            self.dart_client = DartClient(storage_mode='web')

    def new_project(self, project_id, name, corpus_id=None):
        res = self.db.add_project(project_id, name)
        if res is None:
Пример #5
0
            for stmt in pp.statements:
                for ev in stmt.evidence:
                    if 'provenance' not in ev.annotations:
                        ev.annotations['provenance'] = [{
                            'document': {
                                '@id': doc_id
                            }
                        }]
                    else:
                        prov = ev.annotations['provenance'][0]['document']
                        prov['@id'] = doc_id
            stmts += pp.statements
        if grounding == 'compositional':
            validate_grounding_format(stmts)

    ap = AssemblyPipeline.from_json_file('assembly_%s.json' % grounding)
    assembled_stmts = ap.run(stmts)

    if do_upload:
        corpus_id = 'compositional_v4'
        stmts_to_json_file(assembled_stmts, '%s.json' % corpus_id)

        meta_data = {
            'corpus_id':
            corpus_id,
            'description': ('Assembly of 4 reader outputs with the '
                            'compositional ontology (%s).' % ont_url),
            'display_name':
            'Compositional ontology assembly v3',
            'readers':
            readers,