def test_translator_list(self): expected = URIRef('http://example.org/trans1') with connect(p(self.testdir, '.pow', 'pow.conf')) as conn: with transaction.manager: # Create data sources ctx = Context(ident='http://example.org/context', conf=conn.conf) class DT(DataTranslator): class_context = ctx.identifier translator_identifier = expected def translate(source): pass ctx.mapper.process_class(DT) DT.definition_context.save(conn.conf['rdf.graph']) # Create a translator dt = ctx(DT)() ctx_id = conn.conf['data_context_id'] main_ctx = Context(ident=ctx_id, conf=conn.conf) main_ctx.add_import(ctx) main_ctx.save_imports() ctx.save() # List translators assertRegexpMatches( self.sh('pow translator list'), re.compile('^' + expected.n3() + '$', flags=re.MULTILINE))
def test_translate_data_source_loader(self): with connect(p(self.testdir, '.pow', 'pow.conf')) as conn: with transaction.manager: # Create data sources ctx = Context(ident='http://example.org/context', conf=conn.conf) ctx(LFDS)( ident='http://example.org/lfds', file_name='Merged_Nuclei_Stained_Worm.zip', torrent_file_name='d9da5ce947c6f1c127dfcdc2ede63320.torrent') class DT(DataTranslator): class_context = ctx.identifier input_type = LFDS output_type = LFDS translator_identifier = 'http://example.org/trans1' def translate(source): print(source.full_path()) return source ctx.mapper.process_class(DT) dt = ctx(DT)() # Create a translator ctx_id = conn.conf['data_context_id'] DT.definition_context.save(conn.conf['rdf.graph']) main_ctx = Context(ident=ctx_id, conf=conn.conf) main_ctx.add_import(ctx) main_ctx.save_imports() ctx.save() # Do translation assertRegexpMatches( self.sh( 'pow translate http://example.org/trans1 http://example.org/lfds'), r'Merged_Nuclei_Stained_Worm.zip')
def test_save_import(self): ctx0 = Context(ident='http://example.com/context_0') ctx = Context(ident='http://example.com/context_1') new_ctx = Context(ident='http://example.com/context_1') ctx.add_import(new_ctx) ctx.save_imports(ctx0) self.assertEqual(len(ctx0), 1)
def test_add_import(self): ctx0 = Context(ident='http://example.com/context_0') ctx = Context(ident='http://example.com/context_1') ctx2 = Context(ident='http://example.com/context_2') ctx2_1 = Context(ident='http://example.com/context_2_1') ctx.add_import(ctx2) ctx.add_import(ctx2_1) ctx3 = Context(ident='http://example.com/context_3') ctx3.add_import(ctx) final_ctx = Context(ident='http://example.com/context_1', imported=(ctx3,)) final_ctx.save_imports(ctx0) self.assertEqual(len(ctx0), 4)
def test_add_import(self): ctx0 = Context(ident='http://example.com/context_0') ctx = Context(ident='http://example.com/context_1') ctx2 = Context(ident='http://example.com/context_2') ctx2_1 = Context(ident='http://example.com/context_2_1') ctx.add_import(ctx2) ctx.add_import(ctx2_1) ctx3 = Context(ident='http://example.com/context_3') ctx3.add_import(ctx) final_ctx = Context(ident='http://example.com/context_1', imported=(ctx3, )) final_ctx.save_imports(ctx0) self.assertEqual(len(ctx0), 4)
def test_triples_saved(self): graph = set() ident_uri = 'http://example.com/context_1' ident_uri2 = 'http://example.com/context_2' ident_uri2_1 = 'http://example.com/context_2_1' ident_uri3 = 'http://example.com/context_3' ident_uri4 = 'http://example.com/context_4' ctx = Context(ident=ident_uri) ctx2 = Context(ident=ident_uri2) ctx2_1 = Context(ident=ident_uri2_1) ctx.add_import(ctx2) ctx.add_import(ctx2_1) ctx3 = Context(ident=ident_uri3) ctx3.add_import(ctx) last_ctx = Context(ident=ident_uri4) last_ctx.add_import(ctx3) ctx.add_statement(create_mock_statement(ident_uri, 1)) ctx2.add_statement(create_mock_statement(ident_uri2, 2)) ctx2_1.add_statement(create_mock_statement(ident_uri2_1, 2.1)) ctx3.add_statement(create_mock_statement(ident_uri3, 3)) last_ctx.add_statement(create_mock_statement(ident_uri4, 4)) last_ctx.save_context(graph, True) self.assertEqual(last_ctx.triples_saved, 5)
def test_triples_saved_multi(self): graph = set() ident_uri = 'http://example.com/context_1' ident_uri1 = 'http://example.com/context_11' ident_uri2 = 'http://example.com/context_12' ctx = Context(ident=ident_uri) ctx1 = Context(ident=ident_uri1) ctx2 = Context(ident=ident_uri2) ctx2.add_import(ctx) ctx1.add_import(ctx2) ctx1.add_import(ctx) ctx.add_statement(create_mock_statement(ident_uri, 1)) ctx1.add_statement(create_mock_statement(ident_uri1, 3)) ctx2.add_statement(create_mock_statement(ident_uri2, 2)) ctx1.save_context(graph, inline_imports=True) self.assertEqual(ctx1.triples_saved, 3)
def do_insert(ident, config="default.conf", logging=False, imports_context_ident=None, basedir=aux_data()): sources = init_sources() extras = init_extra_sources(basedir) data_sources_by_key = {x.key: x for x in sources + extras} trans_map = init_translators() + init_extra_neuron_data_translators(extras) P.connect(configFile=config, do_logging=logging) P.config() CTX = Context(ident=ident + '-data', imported=(P.CONTEXT, ), conf=P.config()) EVCTX = Context(ident=ident + '-evidence', imported=(P.CONTEXT, ), conf=P.config()) IWCTX = Context(ident=ident, imported=(CTX, EVCTX), conf=P.config()) imports_context = Context(ident=imports_context_ident, conf=P.config()) try: t0 = time() translators = dict() remaining = list(trans_map) last_remaining = None saved_contexts = set([]) while remaining != last_remaining: next_remaining = [] for t in remaining: if not isinstance(t[0], (list, tuple)): source_keys = (t[0], ) else: source_keys = t[0] sources = tuple( data_sources_by_key.get(s) for s in source_keys) if None in sources: next_remaining.append(t) continue translator_class = t[1] if len(t) > 2: output_key = t[2] else: output_key = None translator = translators.get(translator_class, None) if not translator: translator = translator_class() translators[translator_class] = translator print('\n'.join( 'Input({}/{}): {}'.format(i + 1, len(sources), s) for i, s in enumerate(sources))) print('Translating with {}'.format(translator)) orig_wd = os.getcwd() os.chdir(basedir) try: res = translator(*sources, output_key=output_key) finally: os.chdir(orig_wd) print('Result: {}'.format(res)) if isinstance(res, DataWithEvidenceDataSource): res.data_context.save_context( inline_imports=True, saved_contexts=saved_contexts) res.data_context.save_imports(imports_context) res.evidence_context.save_context( inline_imports=True, saved_contexts=saved_contexts) res.evidence_context.save_imports(imports_context) for ctx in res.contexts: raise Exception() if res: if res.key: data_sources_by_key[res.key] = res else: data_sources_by_key[res.identifier] = res last_remaining = list(remaining) remaining = next_remaining for x in remaining: warn("Failed to process: {}".format(x)) # attach_neuromlfiles_to_channel() t1 = time() print("Saving data...") graph = P.config('rdf.graph') for src in data_sources_by_key.values(): if isinstance(src, DataWithEvidenceDataSource): print('saving', src) CTX.add_import(src.data_context) EVCTX.add_import(src.evidence_context) for ctx in src.contexts: IWCTX.add_import(ctx) IWCTX.save_context(graph, saved_contexts=saved_contexts) IWCTX.save_imports(imports_context) print('imports context size', len(imports_context)) print("Saved %d triples." % IWCTX.triples_saved) t2 = time() print("Serializing...") serialize_as_nquads() t3 = time() print("generating objects took", t1 - t0, "seconds") print("saving objects took", t2 - t1, "seconds") print("serializing objects took", t3 - t2, "seconds") except Exception: traceback.print_exc() finally: P.disconnect()
def do_insert(ident, config="default.conf", logging=False, imports_context_ident=None, basedir=aux_data()): sources = init_sources() extras = init_extra_sources(basedir) data_sources_by_key = {x.key: x for x in sources + extras} trans_map = init_translators() + init_extra_neuron_data_translators(extras) P.connect(configFile=config, do_logging=logging) P.config() CTX = Context(ident=ident + '-data', imported=(P.CONTEXT,), conf=P.config()) EVCTX = Context(ident=ident + '-evidence', imported=(P.CONTEXT,), conf=P.config()) IWCTX = Context(ident=ident, imported=(CTX, EVCTX), conf=P.config()) imports_context = Context(ident=imports_context_ident, conf=P.config()) try: t0 = time() translators = dict() remaining = list(trans_map) last_remaining = None saved_contexts = set([]) while remaining != last_remaining: next_remaining = [] for t in remaining: if not isinstance(t[0], (list, tuple)): source_keys = (t[0],) else: source_keys = t[0] sources = tuple(data_sources_by_key.get(s) for s in source_keys) if None in sources: next_remaining.append(t) continue translator_class = t[1] if len(t) > 2: output_key = t[2] else: output_key = None translator = translators.get(translator_class, None) if not translator: translator = translator_class() translators[translator_class] = translator print('\n'.join('Input({}/{}): {}'.format(i + 1, len(sources), s) for i, s in enumerate(sources))) print('Translating with {}'.format(translator)) orig_wd = os.getcwd() os.chdir(basedir) try: res = translator(*sources, output_key=output_key) finally: os.chdir(orig_wd) print('Result: {}'.format(res)) if isinstance(res, DataWithEvidenceDataSource): res.data_context.save_context(inline_imports=True, saved_contexts=saved_contexts) res.data_context.save_imports(imports_context) res.evidence_context.save_context(inline_imports=True, saved_contexts=saved_contexts) res.evidence_context.save_imports(imports_context) for ctx in res.contexts: raise Exception() if res: if res.key: data_sources_by_key[res.key] = res else: data_sources_by_key[res.identifier] = res last_remaining = list(remaining) remaining = next_remaining for x in remaining: warn("Failed to process: {}".format(x)) # attach_neuromlfiles_to_channel() t1 = time() print("Saving data...") graph = P.config('rdf.graph') for src in data_sources_by_key.values(): if isinstance(src, DataWithEvidenceDataSource): print('saving', src) CTX.add_import(src.data_context) EVCTX.add_import(src.evidence_context) for ctx in src.contexts: IWCTX.add_import(ctx) IWCTX.save_context(graph, saved_contexts=saved_contexts) IWCTX.save_imports(imports_context) print('imports context size', len(imports_context)) print("Saved %d triples." % IWCTX.triples_saved) t2 = time() print("Serializing...") serialize_as_nquads() t3 = time() print("generating objects took", t1 - t0, "seconds") print("saving objects took", t2 - t1, "seconds") print("serializing objects took", t3 - t2, "seconds") except Exception: traceback.print_exc() finally: P.disconnect()