def _get_statements(self): import requests from zipfile import ZipFile from indra.sources.bel.api import process_cbn_jgif_file import tempfile cbn_dir = tempfile.mkdtemp('cbn_manager') logger.info('Retrieving CBN network zip archive') tmp_zip = os.path.join(cbn_dir, 'cbn_human.zip') resp = requests.get(self.archive_url) with open(tmp_zip, 'wb') as f: f.write(resp.content) stmts = [] tmp_dir = os.path.join(cbn_dir, 'cbn') os.mkdir(tmp_dir) with ZipFile(tmp_zip) as zipf: logger.info('Extracting archive to %s' % tmp_dir) zipf.extractall(path=tmp_dir) logger.info('Processing jgif files') for jgif in zipf.namelist(): if jgif.endswith('.jgf') or jgif.endswith('.jgif'): logger.info('Processing %s' % jgif) pbp = process_cbn_jgif_file(os.path.join(tmp_dir, jgif)) stmts += pbp.statements uniques, dups = extract_duplicates(stmts, key_func=KeyFunc.mk_and_one_ev_src) logger.info("Deduplicating...") print('\n'.join(str(dup) for dup in dups)) print(len(dups)) return uniques
def test_process_jgif(): test_file_url = 'https://s3.amazonaws.com/bigmech/travis/Hox-2.0-Hs.jgf' test_file = 'Hox-2.0-Hs.jgf' request.urlretrieve(url=test_file_url, filename=test_file) pbp = process_cbn_jgif_file(test_file) # Clean up os.remove(test_file) assert len(pbp.statements) == 26, len(pbp.statements) assert isinstance(pbp.statements[0], Statement) assert all(s.evidence[0].source_api == 'bel' for s in pbp.statements)
def test_process_jgif(): test_file_url = 'https://s3.amazonaws.com/bigmech/travis/Hox-2.0-Hs.jgf' test_file = 'Hox-2.0-Hs.jgf' request.urlretrieve(url=test_file_url, filename=test_file) pbp = process_cbn_jgif_file(test_file) # Clean up os.remove(test_file) assert len(pbp.statements) == 26, len(pbp.statements) assert isinstance(pbp.statements[0], Statement) assert all(s.evidence[0].source_api == 'bel' for s in pbp.statements)