def insert_db_stmts(db, stmts, db_ref_id, verbose=False): """Insert statement, their database, and any affiliated agents. Note that this method is for uploading statements that came from a database to our databse, not for inserting any statements to the database. Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are loading statements. stmts : list [:py:class:`indra.statements.Statement`] A list of un-assembled indra statements to be uploaded to the datbase. db_ref_id : int The id to the db_ref entry corresponding to these statements. verbose : bool If True, print extra information and a status bar while compiling statements for insert. Default False. """ # Preparing the statements for copying stmt_data = [] cols = ('uuid', 'db_ref', 'type', 'json', 'indra_version') if verbose: print("Loading:", end='', flush=True) for i, stmt in enumerate(stmts): stmt_rec = (stmt.uuid, db_ref_id, stmt.__class__.__name__, json.dumps(stmt.to_json()).encode('utf8'), get_version()) stmt_data.append(stmt_rec) if verbose and i % (len(stmts) // 25) == 0: print('|', end='', flush=True) if verbose: print(" Done loading %d statements." % len(stmts)) db.copy('statements', stmt_data, cols) insert_agents(db, db.Statements, db.Agents, db.Statements.db_ref == db_ref_id) return
def insert_pa_stmts(db, stmts, verbose=False, do_copy=True): """Insert pre-assembled statements, and any affiliated agents. Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are loading pre-assembled statements. stmts : iterable [:py:class:`indra.statements.Statement`] A list of pre-assembled indra statements to be uploaded to the datbase. verbose : bool If True, print extra information and a status bar while compiling statements for insert. Default False. """ logger.info("Beginning to insert pre-assembled statements.") stmt_data = [] indra_version = get_version() cols = ('uuid', 'mk_hash', 'type', 'json', 'indra_version') if verbose: print("Loading:", end='', flush=True) for i, stmt in enumerate(stmts): stmt_rec = (stmt.uuid, stmt.get_hash(shallow=True), stmt.__class__.__name__, json.dumps(stmt.to_json()).encode('utf8'), indra_version) stmt_data.append(stmt_rec) if verbose and i % (len(stmts) // 25) == 0: print('|', end='', flush=True) if verbose: print(" Done loading %d statements." % len(stmts)) if do_copy: db.copy('pa_statements', stmt_data, cols) else: db.insert_many('pa_statements', stmt_data, cols=cols) insert_agents(db, 'pa', verbose=verbose) return
def simple_insert_stmts(db, stmts_dict): """Insert raw statements from readings into the database. `stmts_dict` must be of the form {<source_type>: {<source_id>: [stmts]}} where `source_type` is "reading" or "databases", and source_id would be a reading ID or a db_info_id, respectively. """ batch_id = db.make_copy_batch_id() stmt_data = [] cols = ('uuid', 'mk_hash', 'db_info_id', 'reading_id', 'type', 'json', 'batch_id', 'source_hash', 'indra_version') all_stmts = [] for category, stmts in stmts_dict.items(): for src_id, stmt_list in stmts.items(): for stmt in stmt_list: stmt_info = { 'uuid': stmt.uuid, 'mk_hash': stmt.get_hash(refresh=True), 'type': stmt.__class__.__name__, 'json': json.dumps(stmt.to_json()).encode('utf-8'), 'batch_id': batch_id, 'source_hash': -1, 'indra_version': get_version() } if category == 'reading': stmt_info['reading_id'] = src_id else: stmt_info['db_info_id'] = src_id stmt_data.append(tuple(stmt_info.get(col) for col in cols)) all_stmts.append(stmt) db.copy('raw_statements', stmt_data, cols) insert_raw_agents(db, batch_id, all_stmts)
def insert_pa_stmts(db, stmts, verbose=False, do_copy=True, ignore_agents=False, commit=True): """Insert pre-assembled statements, and any affiliated agents. Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are loading pre-assembled statements. stmts : iterable [:py:class:`indra.statements.Statement`] A list of pre-assembled indra statements to be uploaded to the datbase. verbose : bool If True, print extra information and a status bar while compiling statements for insert. Default False. do_copy : bool If True (default), use pgcopy to quickly insert the agents. ignore_agents : bool If False (default), add agents to the database. If True, then agent insertion is skipped. commit : bool If True (default), commit the result immediately. Otherwise the results are not committed (thus allowing multiple related insertions to be neatly rolled back upon failure.) """ logger.info("Beginning to insert pre-assembled statements.") stmt_data = [] indra_version = get_version() cols = ('uuid', 'matches_key', 'mk_hash', 'type', 'json', 'indra_version') activity_rows = [] if verbose: print("Loading:", end='', flush=True) for i, stmt in enumerate(stmts): stmt_rec = (stmt.uuid, stmt.matches_key(), stmt.get_hash(shallow=True), stmt.__class__.__name__, json.dumps(stmt.to_json()).encode('utf8'), indra_version) stmt_data.append(stmt_rec) if isinstance(stmt, ActiveForm): activity_record = (stmt.get_hash(shallow=True), stmt.activity, stmt.is_active) activity_rows.append(activity_record) if verbose and i % (len(stmts) // 25) == 0: print('|', end='', flush=True) if verbose: print(" Done loading %d statements." % len(stmts)) if do_copy: db.copy('pa_statements', stmt_data, cols, commit=False) db.copy('pa_activity', activity_rows, ('stmt_mk_hash', 'activity', 'is_active'), commit=commit) else: db.insert_many('pa_statements', stmt_data, cols=cols) if not ignore_agents: insert_pa_agents(db, stmts, verbose=verbose, commit=commit) return
def insert_db_stmts(db, stmts, db_ref_id, verbose=False): """Insert statement, their database, and any affiliated agents. Note that this method is for uploading statements that came from a database to our databse, not for inserting any statements to the database. Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are loading statements. stmts : list [:py:class:`indra.statements.Statement`] A list of un-assembled indra statements to be uploaded to the datbase. db_ref_id : int The id to the db_ref entry corresponding to these statements. verbose : bool If True, print extra information and a status bar while compiling statements for insert. Default False. """ # Preparing the statements for copying stmt_data = [] cols = ('uuid', 'mk_hash', 'source_hash', 'db_info_id', 'type', 'json', 'indra_version') if verbose: print("Loading:", end='', flush=True) for i, stmt in enumerate(stmts): # Only one evidence is allowed for each statement. for ev in stmt.evidence: new_stmt = stmt.make_generic_copy() new_stmt.evidence.append(ev) stmt_rec = ( new_stmt.uuid, new_stmt.get_hash(shallow=False), new_stmt.evidence[0].get_source_hash(), db_ref_id, new_stmt.__class__.__name__, json.dumps(new_stmt.to_json()).encode('utf8'), get_version() ) stmt_data.append(stmt_rec) if verbose and i % (len(stmts)//25) == 0: print('|', end='', flush=True) if verbose: print(" Done loading %d statements." % len(stmts)) db.copy('raw_statements', stmt_data, cols) stmts_to_add_agents, num_stmts = \ get_statements_without_agents(db, 'raw', db.RawStatements.db_info_id == db_ref_id) insert_agents(db, 'raw', stmts_to_add_agents) return
def insert_db_stmts(db, stmts, db_ref_id): """Insert statement, their database, and any affiliated agents. Note that this method is for uploading statements that came from a database to our databse, not for inserting any statements to the database. """ # Preparing the statements for copying stmt_data = [] cols = ('uuid', 'db_ref', 'type', 'json', 'indra_version') for stmt in stmts: stmt_rec = (stmt.uuid, db_ref_id, stmt.__class__.__name__, json.dumps(stmt.to_json()).encode('utf8'), get_version()) stmt_data.append(stmt_rec) db.copy('statements', stmt_data, cols) db.insert_agents(stmts, db.Statements.db_ref == db_ref_id) return
def insert_db_stmts(db, stmts, db_ref_id, verbose=False, batch_id=None, lazy=False): """Insert statement, their database, and any affiliated agents. Note that this method is for uploading statements that came from a database to our databse, not for inserting any statements to the database. Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are loading statements. stmts : list [:py:class:`indra.statements.Statement`] (Cannot be a generator) A list of un-assembled indra statements, each with EXACTLY one evidence and no exact duplicates, to be uploaded to the database. db_ref_id : int The id to the db_ref entry corresponding to these statements. verbose : bool If True, print extra information and a status bar while compiling statements for insert. Default False. batch_id : int or None Select a batch id to use for this upload. It can be used to trace what content has been added. """ # Preparing the statements for copying if batch_id is None: batch_id = db.make_copy_batch_id() stmt_data = [] cols = ('uuid', 'mk_hash', 'source_hash', 'db_info_id', 'type', 'json', 'indra_version', 'batch_id') if verbose: print("Loading:", end='', flush=True) for i, stmt in enumerate(stmts): assert len(stmt.evidence) == 1, \ 'Statement with %s evidence.' % len(stmt.evidence) stmt_rec = (stmt.uuid, stmt.get_hash(refresh=True), stmt.evidence[0].get_source_hash(refresh=True), db_ref_id, stmt.__class__.__name__, json.dumps(stmt.to_json()).encode('utf8'), get_version(), batch_id) stmt_data.append(stmt_rec) if verbose and i % (len(stmts) // 25) == 0: print('|', end='', flush=True) if verbose: print(" Done preparing %d statements." % len(stmts)) try: # TODO: Make it possible to not commit this immediately. That would # require developing a more sophisticated copy procedure for raw # statements and agents. db.copy_push('raw_statements', stmt_data, cols) except Exception as e: with open('stmt_data_dump.pkl', 'wb') as f: pickle.dump(stmt_data, f) raise e insert_raw_agents(db, batch_id, stmts) return