def get_db(clear=True): "Set up the database for testing." db = get_test_db() db.grab_session() if clear: db._clear(force=True) return db
def __init__(self, max_total_stmts): self.test_db = dbu.get_test_db() self.test_db._clear(force=True) with open(os.path.join(THIS_DIR, 'db_pa_test_input_1M.pkl'), 'rb') as f: self.test_data = pickle.load(f) if max_total_stmts < len(self.test_data['raw_statements']['tuples']): self.stmt_tuples = random.sample( self.test_data['raw_statements']['tuples'], max_total_stmts) else: self.stmt_tuples = self.test_data['raw_statements']['tuples'] self.used_stmt_tuples = set() return
def _get_background_loaded_db(): db = db_util.get_test_db() db._clear(force=True) # get and load the provenance for the statements. print("\tloading background metadata...") db.copy('text_ref', _load_tuples('test_text_ref_tuples.pkl'), ('id', 'pmid', 'pmcid', 'doi')) tc_tuples = [ t + (b'', ) for t in _load_tuples('test_text_content_tuples.pkl') ] db.copy('text_content', tc_tuples, ('id', 'text_ref_id', 'source', 'format', 'text_type', 'content')) r_tuples = [t + (b'', ) for t in _load_tuples('test_reading_tuples.pkl')] db.copy('reading', r_tuples, ('id', 'reader', 'reader_version', 'text_content_id', 'format', 'bytes')) db.copy('db_info', _load_tuples('test_db_info_tuples.pkl'), ('id', 'db_name')) return db
def test_normal_db_reading_call(): chdir(path.expanduser('~')) # Put some basic stuff in the test databsae N = 6 db = dbu.get_test_db() db._clear(force=True) db.copy('text_ref', [(i, 'PMID80945%d' % i) for i in range(N)], cols=('id', 'pmid')) text_content = [ (i, i, 'pubmed', 'text', 'abstract', zip_string('MEK phosphorylates ERK in test %d.' % i)) for i in range(N) ] text_content += [ (N, N-1, 'pmc_oa', 'text', 'fulltext', zip_string('MEK phosphorylates ERK. EGFR activates SHC.')) ] db.copy('text_content', text_content, cols=('id', 'text_ref_id', 'source', 'format', 'text_type', 'content')) # Put an id file on s3 basename = 'local_db_test_run' s3_prefix = 'reading_results/%s/' % basename s3.put_object(Bucket='bigmech', Key=s3_prefix + 'id_list', Body='\n'.join(['tcid: %d' % i for i in range(len(text_content))])) # Call the reading tool sub = srp.DbReadingSubmitter(basename, ['sparser']) job_name, cmd = sub._make_command(0, len(text_content)) cmd += ['--test'] check_call(cmd) sub.produce_report() # Remove garbage on s3 res = s3.list_objects(Bucket='bigmech', Prefix=s3_prefix) for entry in res['Contents']: print("Removing %s..." % entry['Key']) s3.delete_object(Bucket='bigmech', Key=entry['Key']) return
from indra.db.client import get_content_by_refs from indra.db.reading_manager import BulkLocalReadingManager from .util import needs_py3, IS_PY3 if IS_PY3: from indra.db.content_manager import Pubmed, PmcOA, Manuscripts, Elsevier if '-a' in argv: attr_str = argv[argv.index('-a') + 1] if any( [not_attr in attr_str for not_attr in ('!nonpublic', '!webservice')]): raise SkipTest("Every test is nonpublic and a webservice.") try: get_test_db() except Exception as e: raise SkipTest("Not able to start up any of the available test hosts:\n" + str(e)) #============================================================================== # The following are some helpful functions for the rest of the tests. #============================================================================== def assert_contents_equal(list1, list2, msg=None): "Check that the contenst of two lists are the same, regardless of order." res = set(list1) == set(list2) err_msg = "Contents of lists do not match:\n%s\n%s\n" % (list1, list2) if msg is not None: err_msg += msg assert res, err_msg
logger.info("Making readings...") outputs = rdb.produce_readings({'trid': trids}, [reader_inst], read_mode='unread_unread', db=db, prioritize=True, verbose=self.verbose) logger.info("Made %d readings." % len(outputs)) logger.info("Making statements...") rdb.produce_statements(outputs, n_proc=self.n_proc, db=db) return if __name__ == '__main__': if args.test: db = get_test_db() else: db = get_primary_db() if args.method == 'local': bulk_managers = [ BulkLocalReadingManager(reader_name, buffer_days=args.buffer, n_proc=args.num_procs) for reader_name in ['SPARSER', 'REACH'] ] elif args.method == 'aws': bulk_managers = [ BulkAwsReadingManager(reader_name, buffer_days=args.buffer, project_name=args.project_name)
id_dict = get_id_dict([line.strip() for line in id_str_list]) # Some combinations of options don't make sense: forbidden_combos = [('all', 'unread'), ('none', 'unread'), ('none', 'none')] assert (args.read_mode, args.stmt_mode) not in forbidden_combos, \ ("The combination of reading mode %s and statement mode %s is not " "allowed." % (args.reading_mode, args.stmt_mode)) # Init some timing dicts starts = {} ends = {} # Get a handle for the database if args.test: from indra.db import util as dbu db = dbu.get_test_db() else: db = None s3_log_prefix = ('reading_results/%s/logs/run_db_reading_queue/%s/' % (args.basename, args.job_name)) # Read everything ======================================== starts['reading'] = datetime.now() outputs = produce_readings(id_dict, readers, verbose=True, read_mode=args.read_mode, get_preexisting=(args.stmt_mode == 'all'), force_fulltext=args.force_fulltext, prioritize=args.use_best_fulltext, db=db) ends['reading'] = datetime.now()