Пример #1
0
def insert_db_stmts(db, stmts, db_ref_id, verbose=False):
    """Insert statement, their database, and any affiliated agents.

    Note that this method is for uploading statements that came from a
    database to our databse, not for inserting any statements to the database.

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are loading statements.
    stmts : list [:py:class:`indra.statements.Statement`]
        A list of un-assembled indra statements to be uploaded to the datbase.
    db_ref_id : int
        The id to the db_ref entry corresponding to these statements.
    verbose : bool
        If True, print extra information and a status bar while compiling
        statements for insert. Default False.
    """
    # Preparing the statements for copying
    stmt_data = []
    cols = ('uuid', 'db_ref', 'type', 'json', 'indra_version')
    if verbose:
        print("Loading:", end='', flush=True)
    for i, stmt in enumerate(stmts):
        stmt_rec = (stmt.uuid, db_ref_id, stmt.__class__.__name__,
                    json.dumps(stmt.to_json()).encode('utf8'), get_version())
        stmt_data.append(stmt_rec)
        if verbose and i % (len(stmts) // 25) == 0:
            print('|', end='', flush=True)
    if verbose:
        print(" Done loading %d statements." % len(stmts))
    db.copy('statements', stmt_data, cols)
    insert_agents(db, db.Statements, db.Agents,
                  db.Statements.db_ref == db_ref_id)
    return
Пример #2
0
def insert_pa_stmts(db, stmts, verbose=False, do_copy=True):
    """Insert pre-assembled statements, and any affiliated agents.

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are loading pre-assembled
        statements.
    stmts : iterable [:py:class:`indra.statements.Statement`]
        A list of pre-assembled indra statements to be uploaded to the datbase.
    verbose : bool
        If True, print extra information and a status bar while compiling
        statements for insert. Default False.
    """
    logger.info("Beginning to insert pre-assembled statements.")
    stmt_data = []
    indra_version = get_version()
    cols = ('uuid', 'mk_hash', 'type', 'json', 'indra_version')
    if verbose:
        print("Loading:", end='', flush=True)
    for i, stmt in enumerate(stmts):
        stmt_rec = (stmt.uuid,
                    stmt.get_hash(shallow=True), stmt.__class__.__name__,
                    json.dumps(stmt.to_json()).encode('utf8'), indra_version)
        stmt_data.append(stmt_rec)
        if verbose and i % (len(stmts) // 25) == 0:
            print('|', end='', flush=True)
    if verbose:
        print(" Done loading %d statements." % len(stmts))
    if do_copy:
        db.copy('pa_statements', stmt_data, cols)
    else:
        db.insert_many('pa_statements', stmt_data, cols=cols)
    insert_agents(db, 'pa', verbose=verbose)
    return
Пример #3
0
def simple_insert_stmts(db, stmts_dict):
    """Insert raw statements from readings into the database.

    `stmts_dict` must be of the form {<source_type>: {<source_id>: [stmts]}}
    where `source_type` is "reading" or "databases", and source_id would be a
    reading ID or a db_info_id, respectively.
    """
    batch_id = db.make_copy_batch_id()

    stmt_data = []
    cols = ('uuid', 'mk_hash', 'db_info_id', 'reading_id', 'type', 'json',
            'batch_id', 'source_hash', 'indra_version')

    all_stmts = []
    for category, stmts in stmts_dict.items():
        for src_id, stmt_list in stmts.items():
            for stmt in stmt_list:
                stmt_info = {
                    'uuid': stmt.uuid,
                    'mk_hash': stmt.get_hash(refresh=True),
                    'type': stmt.__class__.__name__,
                    'json': json.dumps(stmt.to_json()).encode('utf-8'),
                    'batch_id': batch_id,
                    'source_hash': -1,
                    'indra_version': get_version()
                }
                if category == 'reading':
                    stmt_info['reading_id'] = src_id
                else:
                    stmt_info['db_info_id'] = src_id
                stmt_data.append(tuple(stmt_info.get(col) for col in cols))
                all_stmts.append(stmt)

    db.copy('raw_statements', stmt_data, cols)
    insert_raw_agents(db, batch_id, all_stmts)
Пример #4
0
def insert_pa_stmts(db,
                    stmts,
                    verbose=False,
                    do_copy=True,
                    ignore_agents=False,
                    commit=True):
    """Insert pre-assembled statements, and any affiliated agents.

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are loading pre-assembled
        statements.
    stmts : iterable [:py:class:`indra.statements.Statement`]
        A list of pre-assembled indra statements to be uploaded to the datbase.
    verbose : bool
        If True, print extra information and a status bar while compiling
        statements for insert. Default False.
    do_copy : bool
        If True (default), use pgcopy to quickly insert the agents.
    ignore_agents : bool
        If False (default), add agents to the database. If True, then agent
        insertion is skipped.
    commit : bool
        If True (default), commit the result immediately. Otherwise the results
        are not committed (thus allowing multiple related insertions to be
        neatly rolled back upon failure.)
    """
    logger.info("Beginning to insert pre-assembled statements.")
    stmt_data = []
    indra_version = get_version()
    cols = ('uuid', 'matches_key', 'mk_hash', 'type', 'json', 'indra_version')
    activity_rows = []
    if verbose:
        print("Loading:", end='', flush=True)
    for i, stmt in enumerate(stmts):
        stmt_rec = (stmt.uuid, stmt.matches_key(),
                    stmt.get_hash(shallow=True), stmt.__class__.__name__,
                    json.dumps(stmt.to_json()).encode('utf8'), indra_version)
        stmt_data.append(stmt_rec)

        if isinstance(stmt, ActiveForm):
            activity_record = (stmt.get_hash(shallow=True), stmt.activity,
                               stmt.is_active)
            activity_rows.append(activity_record)
        if verbose and i % (len(stmts) // 25) == 0:
            print('|', end='', flush=True)
    if verbose:
        print(" Done loading %d statements." % len(stmts))
    if do_copy:
        db.copy('pa_statements', stmt_data, cols, commit=False)
        db.copy('pa_activity',
                activity_rows, ('stmt_mk_hash', 'activity', 'is_active'),
                commit=commit)
    else:
        db.insert_many('pa_statements', stmt_data, cols=cols)
    if not ignore_agents:
        insert_pa_agents(db, stmts, verbose=verbose, commit=commit)
    return
Пример #5
0
def insert_db_stmts(db, stmts, db_ref_id, verbose=False):
    """Insert statement, their database, and any affiliated agents.

    Note that this method is for uploading statements that came from a
    database to our databse, not for inserting any statements to the database.

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are loading statements.
    stmts : list [:py:class:`indra.statements.Statement`]
        A list of un-assembled indra statements to be uploaded to the datbase.
    db_ref_id : int
        The id to the db_ref entry corresponding to these statements.
    verbose : bool
        If True, print extra information and a status bar while compiling
        statements for insert. Default False.
    """
    # Preparing the statements for copying
    stmt_data = []
    cols = ('uuid', 'mk_hash', 'source_hash', 'db_info_id', 'type', 'json',
            'indra_version')
    if verbose:
        print("Loading:", end='', flush=True)
    for i, stmt in enumerate(stmts):
        # Only one evidence is allowed for each statement.
        for ev in stmt.evidence:
            new_stmt = stmt.make_generic_copy()
            new_stmt.evidence.append(ev)
            stmt_rec = (
                new_stmt.uuid,
                new_stmt.get_hash(shallow=False),
                new_stmt.evidence[0].get_source_hash(),
                db_ref_id,
                new_stmt.__class__.__name__,
                json.dumps(new_stmt.to_json()).encode('utf8'),
                get_version()
            )
            stmt_data.append(stmt_rec)
        if verbose and i % (len(stmts)//25) == 0:
            print('|', end='', flush=True)
    if verbose:
        print(" Done loading %d statements." % len(stmts))
    db.copy('raw_statements', stmt_data, cols)
    stmts_to_add_agents, num_stmts = \
        get_statements_without_agents(db, 'raw',
                                      db.RawStatements.db_info_id == db_ref_id)
    insert_agents(db, 'raw', stmts_to_add_agents)
    return
Пример #6
0
def insert_db_stmts(db, stmts, db_ref_id):
    """Insert statement, their database, and any affiliated agents.

    Note that this method is for uploading statements that came from a
    database to our databse, not for inserting any statements to the database.
    """
    # Preparing the statements for copying
    stmt_data = []
    cols = ('uuid', 'db_ref', 'type', 'json', 'indra_version')
    for stmt in stmts:
        stmt_rec = (stmt.uuid, db_ref_id, stmt.__class__.__name__,
                    json.dumps(stmt.to_json()).encode('utf8'), get_version())
        stmt_data.append(stmt_rec)
    db.copy('statements', stmt_data, cols)
    db.insert_agents(stmts, db.Statements.db_ref == db_ref_id)
    return
Пример #7
0
def insert_db_stmts(db,
                    stmts,
                    db_ref_id,
                    verbose=False,
                    batch_id=None,
                    lazy=False):
    """Insert statement, their database, and any affiliated agents.

    Note that this method is for uploading statements that came from a
    database to our databse, not for inserting any statements to the database.

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are loading statements.
    stmts : list [:py:class:`indra.statements.Statement`]
        (Cannot be a generator) A list of un-assembled indra statements, each
        with EXACTLY one evidence and no exact duplicates, to be uploaded to
        the database.
    db_ref_id : int
        The id to the db_ref entry corresponding to these statements.
    verbose : bool
        If True, print extra information and a status bar while compiling
        statements for insert. Default False.
    batch_id : int or None
        Select a batch id to use for this upload. It can be used to trace what
        content has been added.
    """
    # Preparing the statements for copying
    if batch_id is None:
        batch_id = db.make_copy_batch_id()

    stmt_data = []

    cols = ('uuid', 'mk_hash', 'source_hash', 'db_info_id', 'type', 'json',
            'indra_version', 'batch_id')
    if verbose:
        print("Loading:", end='', flush=True)

    for i, stmt in enumerate(stmts):
        assert len(stmt.evidence) == 1, \
            'Statement with %s evidence.' % len(stmt.evidence)

        stmt_rec = (stmt.uuid, stmt.get_hash(refresh=True),
                    stmt.evidence[0].get_source_hash(refresh=True),
                    db_ref_id, stmt.__class__.__name__,
                    json.dumps(stmt.to_json()).encode('utf8'), get_version(),
                    batch_id)

        stmt_data.append(stmt_rec)

        if verbose and i % (len(stmts) // 25) == 0:
            print('|', end='', flush=True)

    if verbose:
        print(" Done preparing %d statements." % len(stmts))

    try:
        # TODO: Make it possible to not commit this immediately. That would
        # require developing a more sophisticated copy procedure for raw
        # statements and agents.
        db.copy_push('raw_statements', stmt_data, cols)
    except Exception as e:
        with open('stmt_data_dump.pkl', 'wb') as f:
            pickle.dump(stmt_data, f)
        raise e
    insert_raw_agents(db, batch_id, stmts)
    return