def test_convert(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2-*.csv') path_core_3 = os.path.join(self.dir, 'core_3.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') path_seq_3 = os.path.join(self.dir, 'seq_3.fna') io.Writer().run(path_core_1, self.kb, seq_path=path_seq_1, data_repo_metadata=False) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) kb = io.Reader().run(path_core_2, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, path_seq_2, shallow=False)) io.convert(path_core_2, path_seq_2, path_core_3, path_seq_3) kb = io.Reader().run(path_core_3, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, path_seq_3, shallow=False))
def test_convert_sloppy(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2-*.csv') path_core_3 = os.path.join(self.dir, 'core_3.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') path_seq_3 = os.path.join(self.dir, 'seq_3.fna') io.Writer().run(path_core_1, self.kb, seq_path=path_seq_1, data_repo_metadata=False) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) wb = wc_utils.workbook.io.read(path_core_1) row = wb['!!KB'].pop(4) wb['!!KB'].insert(5, row) wc_utils.workbook.io.write(path_core_1, wb) with self.assertRaisesRegex(ValueError, "cannot be loaded because"): io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) env = EnvironmentVarGuard() env.set('CONFIG__DOT__wc_kb__DOT__io__DOT__strict', '0') with env: io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) kb = io.Reader().run(path_core_2, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, path_seq_2, shallow=False)) io.convert(path_core_2, path_seq_2, path_core_3, path_seq_3) kb = io.Reader().run(path_core_3, seq_path=self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, path_seq_3, shallow=False))
def test_normalize(self): filename_core_1 = path.join(self.tempdir, 'model-1.xlsx') filename_seq_1 = path.join(self.tempdir, 'seq-1.fna') filename_core_2 = path.join(self.tempdir, 'model-2.xlsx') filename_seq_2 = path.join(self.tempdir, 'seq-2.fna') kb = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.0') io.Writer().run(kb, filename_core_1, filename_seq_1, set_repo_metadata_from_path=False) # with same dest with __main__.App( argv=['normalize', filename_core_1, filename_seq_1]) as app: app.run() kb2 = io.Reader().run(filename_core_1, filename_seq_1) self.assertTrue(kb2.is_equal(kb)) # with different dest with __main__.App(argv=[ 'normalize', filename_core_1, filename_seq_1, '--dest-core', filename_core_2, '--dest-seq', filename_seq_2 ]) as app: app.run() kb2 = io.Reader().run(filename_core_2, filename_seq_2) self.assertTrue(kb2.is_equal(kb))
def test_convert_sloppy(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2-*.csv') path_core_3 = os.path.join(self.dir, 'core_3.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') path_seq_3 = os.path.join(self.dir, 'seq_3.fna') io.Writer().run(self.kb, path_core_1, path_seq_1, set_repo_metadata_from_path=False) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) wb = wc_utils.workbook.io.read(path_core_1) row = wb['Knowledge base'].pop(0) wb['Knowledge base'].insert(1, row) wc_utils.workbook.io.write(path_core_1, wb) with self.assertRaisesRegex(ValueError, "The columns of worksheet 'Knowledge base' must be defined in this order"): io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2) io.convert(path_core_1, path_seq_1, path_core_2, path_seq_2, strict=False) kb = io.Reader().run(path_core_2, self.seq_path) self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, path_seq_2, shallow=False)) io.convert(path_core_2, path_seq_2, path_core_3, path_seq_3) kb = io.Reader().run(path_core_3, self.seq_path) self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, path_seq_3, shallow=False))
def _default(self): args = self.app.pargs if args.compare_files: kb1 = wc_utils.workbook.io.read(args.core_path_1) kb2 = wc_utils.workbook.io.read(args.core_path_2) if not args.compare_metadata_in_files: self.remove_metadata(kb1) self.remove_metadata(kb2) diff = kb1.difference(kb2) else: kb1 = io.Reader().run(args.core_path_1, seq_path=args.seq_path_1, taxon=args.taxon)[core.KnowledgeBase][0] kb2 = io.Reader().run(args.core_path_2, seq_path=args.seq_path_2, taxon=args.taxon)[core.KnowledgeBase][0] diff = kb1.difference(kb2) if diff: print(diff) else: print('Knowledge bases are identical')
def test_reader_no_kb(self): core_path = os.path.join(self.dir, 'core.xlsx') obj_model.io.WorkbookWriter().run(core_path, [], io.PROKARYOTE_MODEL_ORDER, include_all_attributes=False) seq_path = os.path.join(self.dir, 'test_seq.fna') with open(seq_path, 'w') as file: pass kb = io.Reader().run(core_path, seq_path) self.assertEqual(kb, None) obj_model.io.WorkbookWriter().run(core_path, [core.Cell(id='cell')], io.PROKARYOTE_MODEL_ORDER, include_all_attributes=False) with self.assertRaisesRegex(ValueError, 'cannot contain instances'): io.Reader().run(core_path, seq_path)
def _default(self): args = self.app.pargs try: io.Reader().run(args.core_path, args.seq_path, strict=args.strict) print('Knowledge base is valid') except ValueError as exception: raise SystemExit('Knowledge base is invalid: ' + str(exception))
def test_read_write_eukaryote(self): fixtures = os.path.join(os.path.dirname(__file__), 'fixtures') core_path = os.path.join(fixtures, 'eukaryote_core.xlsx') seq_path = os.path.join(fixtures, 'eukaryote_seq.fna') reader = io.Reader() kb = reader.run(core_path, seq_path=seq_path, taxon='eukaryote', rewrite_seq_path=False)[core.KnowledgeBase][0] tmp_core_path = os.path.join(self.dir, 'tmp_eukaryote_core.xlsx') tmp_seq_path = os.path.join(self.dir, 'tmp_eukaryote_seq.fna') writer = io.Writer() writer.run(tmp_core_path, kb, seq_path=tmp_seq_path, taxon='eukaryote', data_repo_metadata=False) tmp_kb = reader.run(tmp_core_path, seq_path, taxon='eukaryote')[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(tmp_kb)) self.assertTrue(filecmp.cmp(tmp_seq_path, seq_path, shallow=False))
def test_write_read_sloppy(self): core_path = os.path.join(self.dir, 'core.xlsx') seq_path = os.path.join(self.dir, 'test_seq.fna') writer = io.Writer() writer.run(core_path, self.kb, seq_path=seq_path, data_repo_metadata=False) wb = wc_utils.workbook.io.read(core_path) row = wb['!!KB'].pop(4) wb['!!KB'].insert(5, row) wc_utils.workbook.io.write(core_path, wb) reader = io.Reader() with self.assertRaisesRegex(ValueError, "cannot be loaded because"): reader.run(core_path, seq_path=self.seq_path) env = EnvironmentVarGuard() env.set('CONFIG__DOT__wc_kb__DOT__io__DOT__strict', '0') with env: kb = reader.run(core_path, self.seq_path)[core.KnowledgeBase][0] self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def test_write_with_repo_metadata(self): with tempfile.TemporaryDirectory() as temp_dir: # create temp git repo & write file into it test_repo_name = 'test_wc_kb_test_io' test_github_repo = GitHubRepoForTests(test_repo_name) repo = test_github_repo.make_test_repo(temp_dir) _, core_path = tempfile.mkstemp(dir=temp_dir, suffix='.xlsx') _, seq_path = tempfile.mkstemp(dir=temp_dir, suffix='.fna') # write data repo metadata in data_file writer = io.Writer() writer.run(core_path, self.kb, seq_path=seq_path, data_repo_metadata=True) # deliberately read metadata reader = io.Reader() objs_read = reader.run(core_path, seq_path=seq_path, read_metadata=True) data_repo_metadata = objs_read[utils.DataRepoMetadata][0] self.assertTrue( data_repo_metadata.url.startswith('https://github.com/')) self.assertEqual(data_repo_metadata.branch, 'main') self.assertEqual(len(data_repo_metadata.revision), 40) # delete test repo test_github_repo.delete_test_repo()
def _default(self): args = self.app.pargs kb = io.Reader().run(args.path_core, args.path_seq, strict=args.strict) kb.wc_kb_version = wc_kb.__version__ io.Writer().run( kb, args.path_core, args.path_seq, set_repo_metadata_from_path=args.set_repo_metadata_from_path)
def test_rewrite_seq_path_in_read_write(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') io.Writer().run(self.kb, path_core_1, path_seq_1, set_repo_metadata_from_path=False) kb1 = io.Reader().run(path_core_1, path_seq_1) kb2 = io.Reader().run(path_core_1, path_seq_1, rewrite_seq_path=False) self.assertFalse(kb1.is_equal(self.kb)) self.assertTrue(kb2.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) io.Writer().run(self.kb, path_core_2, path_seq_2, rewrite_seq_path=True, set_repo_metadata_from_path=False) kb3 = io.Reader().run(path_core_2, self.seq_path) kb4 = io.Reader().run(path_core_2, self.seq_path, rewrite_seq_path=False) self.assertFalse(kb3.is_equal(self.kb)) self.assertTrue(kb4.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, self.seq_path, shallow=False))
def test_reader_no_kb(self): core_path = os.path.join(self.dir, 'core.xlsx') obj_tables.io.WorkbookWriter().run(core_path, [], models=io.PROKARYOTE_MODELS, include_all_attributes=False) seq_path = os.path.join(self.dir, 'test_seq.fna') with open(seq_path, 'w') as file: pass with self.assertRaisesRegex(ValueError, 'should define one knowledge base'): io.Reader().run(core_path, seq_path=seq_path) obj_tables.io.WorkbookWriter().run(core_path, [core.Cell(id='cell')], models=io.PROKARYOTE_MODELS, include_all_attributes=False) with self.assertRaisesRegex(ValueError, 'should define one knowledge base'): io.Reader().run(core_path, seq_path=seq_path)
def _default(self): args = self.app.pargs if args.compare_files: kb1 = wc_utils.workbook.io.read(args.core_path_1) kb2 = wc_utils.workbook.io.read(args.core_path_2) diff = kb1.difference(kb2) else: kb1 = io.Reader().run(args.core_path_1, args.seq_path_1, strict=args.strict) kb2 = io.Reader().run(args.core_path_2, args.seq_path_2, strict=args.strict) diff = kb1.difference(kb2) if diff: print(diff) else: print('Knowledge bases are identical')
def _default(self): args = self.app.pargs kb = io.Reader().run(args.path_core, seq_path=args.path_seq, taxon=args.taxon)[core.KnowledgeBase][0] kb.wc_kb_version = wc_kb.__version__ io.Writer().run(args.path_core, kb, seq_path=args.path_seq, taxon=args.taxon, data_repo_metadata=args.data_repo_metadata, protected=(not args.unprotected))
def test_reader_error_no_cell(self): kb = core.KnowledgeBase(id='kb', name='kb1', version='0.0.1') dna = core.DnaSpeciesType(id='chr') core_path = os.path.join(self.dir, 'core.xlsx') obj_model.io.WorkbookWriter().run(core_path, [kb, dna], io.PROKARYOTE_MODEL_ORDER, include_all_attributes=False) seq_path = os.path.join(self.dir, 'test_seq.fna') with open(seq_path, 'w') as file: pass with self.assertRaisesRegex(ValueError, 'cannot contain instances'): io.Reader().run(core_path, seq_path)
def test_reader_error_multiple_kbs(self): kb1 = core.KnowledgeBase(id='kb1', name='kb1', version='0.0.1') kb2 = core.KnowledgeBase(id='kb2', name='kb2', version='0.0.1') core_path = os.path.join(self.dir, 'core.xlsx') obj_model.io.WorkbookWriter().run(core_path, [kb1, kb2], io.PROKARYOTE_MODEL_ORDER, include_all_attributes=False) seq_path = os.path.join(self.dir, 'test_seq.fna') with open(seq_path, 'w') as file: pass with self.assertRaisesRegex(ValueError, ' should define one knowledge base'): io.Reader().run(core_path, seq_path)
def test_reader_no_cell(self): kb = core.KnowledgeBase(id='kb', name='kb1', version='0.0.1') dna = core.DnaSpeciesType(id='chr') core_path = os.path.join(self.dir, 'core.xlsx') obj_tables.io.WorkbookWriter().run(core_path, [kb, dna], models=io.PROKARYOTE_MODELS, include_all_attributes=False) seq_path = os.path.join(self.dir, 'test_seq.fna') with open(seq_path, 'w') as file: pass io.Reader().run(core_path, seq_path=seq_path)
def test_write_read(self): core_path = os.path.join(self.dir, 'core.xlsx') writer = io.Writer() writer.run(self.kb, core_path, set_repo_metadata_from_path=False) reader = io.Reader() kb = reader.run(core_path, self.seq_path) core_path = os.path.join(self.dir, 'core2.xlsx') seq_path = os.path.join(self.dir, 'seq2.fna') writer.run(kb, core_path, seq_path, set_repo_metadata_from_path=False) self.assertTrue(self.kb.is_equal(kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def _default(self): args = self.app.pargs kb = io.Reader().run(args.source_core, args.source_seq, strict=args.strict) if args.dest_core or args.dest_seq: io.Writer().run(kb, args.dest_core, args.dest_seq, set_repo_metadata_from_path=False) else: io.Writer().run(kb, args.source_core, args.source_seq, set_repo_metadata_from_path=False)
def test_rewrite_seq_path_in_read_write(self): path_core_1 = os.path.join(self.dir, 'core_1.xlsx') path_core_2 = os.path.join(self.dir, 'core_2.xlsx') path_seq_1 = os.path.join(self.dir, 'seq_1.fna') path_seq_2 = os.path.join(self.dir, 'seq_2.fna') io.Writer().run(path_core_1, self.kb, seq_path=path_seq_1, data_repo_metadata=False) kb1 = io.Reader().run(path_core_1, seq_path=path_seq_1)[core.KnowledgeBase][0] kb2 = io.Reader().run(path_core_1, seq_path=path_seq_1, rewrite_seq_path=False)[core.KnowledgeBase][0] kb3 = io.Reader().run(path_core_1, seq_path=self.seq_path)[core.KnowledgeBase][0] kb4 = io.Reader().run(path_core_1, seq_path=self.seq_path, rewrite_seq_path=False)[core.KnowledgeBase][0] self.assertFalse(kb1.is_equal(self.kb)) self.assertFalse(kb2.is_equal(self.kb)) self.assertTrue(kb3.is_equal(self.kb)) self.assertFalse(kb4.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_1, self.seq_path, shallow=False)) io.Writer().run(path_core_2, self.kb, seq_path=path_seq_2, rewrite_seq_path=False, data_repo_metadata=False) kb5 = io.Reader().run(path_core_2, seq_path=path_seq_2)[core.KnowledgeBase][0] kb6 = io.Reader().run(path_core_2, seq_path=path_seq_2, rewrite_seq_path=False)[core.KnowledgeBase][0] kb7 = io.Reader().run(path_core_2, seq_path=self.seq_path)[core.KnowledgeBase][0] kb8 = io.Reader().run(path_core_2, seq_path=self.seq_path, rewrite_seq_path=False)[core.KnowledgeBase][0] self.assertFalse(kb5.is_equal(self.kb)) self.assertTrue(kb6.is_equal(self.kb)) self.assertTrue(kb7.is_equal(self.kb)) self.assertTrue(kb8.is_equal(self.kb)) self.assertTrue(filecmp.cmp(path_seq_2, self.seq_path, shallow=False))
def test_write_read(self): core_path = os.path.join(self.dir, 'core.xlsx') writer = io.Writer() writer.run(core_path, self.kb, data_repo_metadata=False) reader = io.Reader() kb = reader.run(core_path, seq_path=self.seq_path)[core.KnowledgeBase][0] core_path = os.path.join(self.dir, 'core2.xlsx') seq_path = os.path.join(self.dir, 'seq2.fna') writer.run(core_path, kb, seq_path, data_repo_metadata=False) self.assertTrue(self.kb.is_equal(kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def test_read_flat_list_of_objects(self): core_path = os.path.join(self.dir, 'core.xlsx') writer = io.Writer() writer.run(core_path, self.kb, data_repo_metadata=False) reader = io.Reader() objs = reader.run(core_path, seq_path=self.seq_path) self.assertIsInstance(objs, dict) objs = reader.run(core_path, seq_path=self.seq_path, group_objects_by_model=False) self.assertIsInstance(objs, list) kb = next(obj for obj in objs if isinstance(obj, core.KnowledgeBase)) self.assertTrue(kb.is_equal(self.kb))
def test_reader_error_multiple_cells(self): kb = core.KnowledgeBase(id='kb', name='kb1', version='0.0.1') cell1 = core.Cell(id='cell1', name='cell1') cell2 = core.Cell(id='cell2', name='cell2') core_path = os.path.join(self.dir, 'core.xlsx') obj_tables.io.WorkbookWriter().run(core_path, [kb, cell1, cell2], models=io.PROKARYOTE_MODELS, include_all_attributes=False) seq_path = os.path.join(self.dir, 'test_seq.fna') with open(seq_path, 'w') as file: pass with self.assertRaisesRegex(ValueError, ' should define zero or one cells'): io.Reader().run(core_path, seq_path=seq_path)
def test_read_write_eukaryote(self): fixtures = os.path.join(os.path.dirname(__file__), 'fixtures') core_path = os.path.join(fixtures, 'eukaryote_core.xlsx') seq_path = os.path.join(fixtures, 'eukaryote_seq.fna') reader = io.Reader() kb = reader.run(core_path, seq_path, schema=False) tmp_core_path = os.path.join(self.dir, 'tmp_eukaryote_core.xlsx') tmp_seq_path = os.path.join(self.dir, 'tmp_eukaryote_seq.fna') writer = io.Writer() writer.run(kb, tmp_core_path, tmp_seq_path, schema=False, set_repo_metadata_from_path=False) tmp_kb = reader.run(tmp_core_path, seq_path, schema=False) self.assertTrue(kb.is_equal(tmp_kb)) self.assertTrue(filecmp.cmp(tmp_seq_path, seq_path, shallow=False))
def test_write_read_sloppy(self): core_path = os.path.join(self.dir, 'core.xlsx') seq_path = os.path.join(self.dir, 'test_seq.fna') writer = io.Writer() writer.run(self.kb, core_path, seq_path, set_repo_metadata_from_path=False) wb = wc_utils.workbook.io.read(core_path) row = wb['Knowledge base'].pop(0) wb['Knowledge base'].insert(1, row) wc_utils.workbook.io.write(core_path, wb) reader = io.Reader() with self.assertRaisesRegex(ValueError, "The columns of worksheet 'Knowledge base' must be defined in this order"): kb = reader.run(core_path, self.seq_path) kb = reader.run(core_path, self.seq_path, strict=False) self.assertTrue(kb.is_equal(self.kb)) self.assertTrue(filecmp.cmp(self.seq_path, seq_path, shallow=False))
def _default(self): args = self.app.pargs kb = io.Reader().run(args.source_core, seq_path=args.source_seq, taxon=args.taxon)[core.KnowledgeBase][0] if args.dest_core or args.dest_seq: io.Writer().run(args.dest_core, kb, seq_path=args.dest_seq, taxon=args.taxon, data_repo_metadata=False, protected=(not args.unprotected)) else: io.Writer().run(args.source_core, kb, seq_path=args.source_seq, taxon=args.taxon, data_repo_metadata=False, protected=(not args.unprotected))
def test_update_version_metadata(self): filename_core = path.join(self.tempdir, 'core.xlsx') filename_seq = path.join(self.tempdir, 'seq.fna') kb = wc_kb.KnowledgeBase(id='kb', name='KB', version='0.0.1a', wc_kb_version='0.0.0') self.assertNotEqual(kb.wc_kb_version, wc_kb.__version__) io.Writer().run(kb, filename_core, filename_seq, set_repo_metadata_from_path=False) with __main__.App(argv=[ 'update-version-metadata', filename_core, filename_seq, '--ignore-repo-metadata' ]) as app: app.run() kb = io.Reader().run(filename_core, filename_seq) self.assertEqual(kb.wc_kb_version, wc_kb.__version__)
def test_create_template(self): path_core = os.path.join(self.dir, 'template.xlsx') path_seq = os.path.join(self.dir, 'template_seq.fna') io.create_template(path_core, path_seq, set_repo_metadata_from_path=False) kb = io.Reader().run(path_core, path_seq)
def test_create_template(self): path_core = os.path.join(self.dir, 'template.xlsx') path_seq = os.path.join(self.dir, 'template_seq.fna') io.create_template(path_core, path_seq, data_repo_metadata=False) kb = io.Reader().run(path_core, seq_path=path_seq)[core.KnowledgeBase][0]