Exemplo n.º 1
0
    def _get_statements(self):
        import requests
        from zipfile import ZipFile
        from indra.sources.bel.api import process_cbn_jgif_file
        import tempfile

        cbn_dir = tempfile.mkdtemp('cbn_manager')

        logger.info('Retrieving CBN network zip archive')
        tmp_zip = os.path.join(cbn_dir, 'cbn_human.zip')
        resp = requests.get(self.archive_url)
        with open(tmp_zip, 'wb') as f:
            f.write(resp.content)

        stmts = []
        tmp_dir = os.path.join(cbn_dir, 'cbn')
        os.mkdir(tmp_dir)
        with ZipFile(tmp_zip) as zipf:
            logger.info('Extracting archive to %s' % tmp_dir)
            zipf.extractall(path=tmp_dir)
            logger.info('Processing jgif files')
            for jgif in zipf.namelist():
                if jgif.endswith('.jgf') or jgif.endswith('.jgif'):
                    logger.info('Processing %s' % jgif)
                    pbp = process_cbn_jgif_file(os.path.join(tmp_dir, jgif))
                    stmts += pbp.statements

        uniques, dups = extract_duplicates(stmts,
                                           key_func=KeyFunc.mk_and_one_ev_src)

        logger.info("Deduplicating...")
        print('\n'.join(str(dup) for dup in dups))
        print(len(dups))

        return uniques
Exemplo n.º 2
0
def test_process_jgif():
    test_file_url = 'https://s3.amazonaws.com/bigmech/travis/Hox-2.0-Hs.jgf'
    test_file = 'Hox-2.0-Hs.jgf'
    request.urlretrieve(url=test_file_url, filename=test_file)
    pbp = process_cbn_jgif_file(test_file)

    # Clean up
    os.remove(test_file)

    assert len(pbp.statements) == 26, len(pbp.statements)
    assert isinstance(pbp.statements[0], Statement)
    assert all(s.evidence[0].source_api == 'bel' for s in pbp.statements)
Exemplo n.º 3
0
def test_process_jgif():
    test_file_url = 'https://s3.amazonaws.com/bigmech/travis/Hox-2.0-Hs.jgf'
    test_file = 'Hox-2.0-Hs.jgf'
    request.urlretrieve(url=test_file_url, filename=test_file)
    pbp = process_cbn_jgif_file(test_file)

    # Clean up
    os.remove(test_file)

    assert len(pbp.statements) == 26, len(pbp.statements)
    assert isinstance(pbp.statements[0], Statement)
    assert all(s.evidence[0].source_api == 'bel' for s in pbp.statements)