예제 #1
0
 def run_workflow(self):
     """Test functionality to execute a Python script that creates a dataset
     containing unicode characters."""
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(self.ENGINE_ID, {'name' : 'My Project'})
     # LOAD DATASET
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # RUN Python Script
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.python_cell(PYTHON_SCRIPT)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr
     self.assertFalse(wf.has_error)
     #print wf.modules[-1].stdout[0]['data']
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     names = set(c.name.upper().replace('_', ' ') for c in ds.columns)
     self.assertTrue(len(names), 4)
     for name in ['DATE', 'IMO CODE', 'PORT OF DEPARTURE', 'PLACE OF RECEIPT']:
         self.assertTrue(name in names)
예제 #2
0
 def run_erroneous_workflow(self):
     """Test workflow that has errors."""
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'})
     #print '(1) CREATE DATASET'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.load_dataset(
                                        f_handle.identifier, DS_NAME))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     col_age = ds.column_by_name('Age')
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.update_cell(
                                        DS_NAME, col_age.identifier, 0,
                                        '28'))
     # This should create an error because of the invalid column name
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.rename_column(
                                        DS_NAME, col_age.identifier, ''))
     # This should not have any effect
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.update_cell(
                                        DS_NAME, col_age.identifier, 0,
                                        '29'))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     # Make sure that all workflow modules have a non-negative identifier
     # and that they are all unique
     identifier = set()
     for m in wf.modules:
         self.assertTrue(m.identifier >= 0)
         self.assertTrue(not m.identifier in identifier)
         identifier.add(m.identifier)
예제 #3
0
 def test_type_inference_lens(self):
     """Test TYPE INFERENCE lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds1 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertFalse(wf.has_error)
     # Infer type
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_type_inference(DS_NAME, 0.6)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     print wf.modules[-1].command_text.upper()
     self.assertEquals(wf.modules[-1].command_text.upper(), 'TYPE INFERENCE FOR COLUMNS IN ' + DS_NAME.upper() + ' WITH PERCENT_CONFORM = 0.6')
     # Get dataset
     ds2 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds2.columns), 3)
     self.assertEquals(ds2.row_count, 7)
     ds1_rows = ds1.fetch_rows()
     ds2_rows = ds2.fetch_rows()
     for i in range(ds2.row_count):
         self.assertEquals(ds1_rows[i].values, ds2_rows[i].values)
     mimir.finalize()
예제 #4
0
 def test_view_urls(self):
     """Ensure that the urls for workflow views get updated correctly when
     the workflow is modified."""
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'})
     #print '(1) CREATE DATASET'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.load_dataset(
                                        f_handle.identifier, DS_NAME))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     #print '(2) PLOT'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.create_plot(
                                        DS_NAME,
                                        CHART_NAME,
                                        series=[{
                                            'series_column': 2
                                        }]))
     url = self.api.get_workflow(
         vt.identifier,
         DEFAULT_BRANCH)['state']['charts'][0]['links'][0]['href']
     self.assertTrue('master/workflows/1/modules/1/views' in url)
     # print '(3) UPDATE CELL'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.update_cell(
                                        DS_NAME, 0, 0, '28'))
     url = self.api.get_workflow(
         vt.identifier,
         DEFAULT_BRANCH)['state']['charts'][0]['links'][0]['href']
     self.assertTrue('master/workflows/2/modules/2/views' in url)
예제 #5
0
 def test_workflow_life_cycle(self):
     """Test functionality to execute a workflow module."""
     # Create new work trail.
     vt = self.db.create_viztrail(ENV.identifier, {'name': 'My Project'})
     # Append two modules
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=load_dataset('file', 'name'))
     # Create a branch at the end of the default branch
     newbranch = self.db.create_branch(viztrail_id=vt.identifier,
                                       properties={'name': 'New Branch'})
     # Append modules at end ofnew branch
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    branch_id=newbranch.identifier,
                                    command=python_cell('xyz'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    branch_id=newbranch.identifier,
                                    command=load_dataset('file', 'myname'),
                                    before_id=0)
     # Ensure that all version files exist
     self.check_files(vt.identifier, vt.branches[DEFAULT_BRANCH].workflows,
                      True)
     new_versions = vt.branches[newbranch.identifier].workflows
     self.check_files(vt.identifier, new_versions, True)
     # Delete new branch. Ensure that only the master versions exist
     self.assertTrue(
         self.db.delete_branch(viztrail_id=vt.identifier,
                               branch_id=newbranch.identifier))
     self.check_files(vt.identifier, vt.branches[DEFAULT_BRANCH].workflows,
                      True)
     self.check_files(vt.identifier, new_versions, False)
     # Deleting a non-existing branch should return False
     self.assertFalse(
         self.db.delete_branch(viztrail_id=vt.identifier,
                               branch_id=newbranch.identifier))
     self.assertFalse(
         self.db.delete_branch(viztrail_id=vt.identifier,
                               branch_id='unknown'))
     # Deleting master branch should raise exception
     with self.assertRaises(ValueError):
         self.db.delete_branch(viztrail_id=vt.identifier,
                               branch_id=DEFAULT_BRANCH)
예제 #6
0
 def test_validate_vizual(self):
     """Test validation ofVizUAL cell command specifications."""
     # DELETE COLUMN
     obj = cmd.delete_column('dataset', 'column')
     cmd.validate_command(self.command_repository, obj)
     obj.arguments['row'] = 'row'
     with self.assertRaises(ValueError):
         cmd.validate_command(self.command_repository, obj)
     obj = cmd.delete_column('dataset', 'column')
     del obj.arguments['dataset']
     with self.assertRaises(ValueError):
         cmd.validate_command(self.command_repository, obj)
     obj = cmd.delete_column('dataset', 'column')
     obj.arguments['row'] = 'row'
     with self.assertRaises(ValueError):
         cmd.validate_command(self.command_repository, obj)
     # DELETE ROW
     obj = cmd.delete_row('dataset', 'row')
     cmd.validate_command(self.command_repository, obj)
     # INSERT COLUMN
     obj = cmd.insert_column('dataset', 1, 'A')
     cmd.validate_command(self.command_repository, obj)
     # INSERT ROW
     obj = cmd.insert_row('dataset', 1)
     cmd.validate_command(self.command_repository, obj)
     # LOAD DATASET
     obj = cmd.load_dataset('file', 'dataset', filename='My File')
     cmd.validate_command(self.command_repository, obj)
     # MOVE COLUMN
     obj = cmd.move_column('dataset', 'A', 2)
     cmd.validate_command(self.command_repository, obj)
     # MOVE ROW
     obj = cmd.move_row('dataset', 1, 2)
     cmd.validate_command(self.command_repository, obj)
     # RENAME COLUMN
     obj = cmd.rename_column('dataset', 'A', 'B')
     cmd.validate_command(self.command_repository, obj)
     # UPDATE CELL
     obj = cmd.update_cell('dataset', 'A', 1, 'X')
     cmd.validate_command(self.command_repository, obj)
     # Unknown VizUAL Command
     obj = {
         'name': 'unknown',
         'arguments': {
             'dataset': '1',
             'name': '2',
             'position': '3'
         }
     }
     with self.assertRaises(ValueError):
         cmd.validate_command(
             self.command_repository,
             ModuleSpecification(cmd.PACKAGE_VIZUAL, 'unknown', obj))
예제 #7
0
 def test_geocode_lens(self):
     """Test GEOCODE lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(GEO_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertFalse(wf.has_error)
     # Geocode Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_geocode(
             DS_NAME,
             'GOOGLE',
             house_nr=ds.column_by_name('STRNUMBER').identifier,
             street=ds.column_by_name('STRNAME').identifier,
             city=ds.column_by_name('CITY').identifier,
             state=ds.column_by_name('STATE').identifier
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE HOUSE_NUMBER=STRNUMBER,STREET=STRNAME,CITY=CITY,STATE=STATE PEOPLE USING GOOGLE')
     self.assertFalse(wf.has_error)
     self.assertEquals(len(wf.modules), 2)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 6)
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_geocode(
             DS_NAME,
             'GOOGLE'
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE PEOPLE USING GOOGLE')
     self.assertFalse(wf.has_error)
     self.assertEquals(len(wf.modules), 3)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 8)
     mimir.finalize()
예제 #8
0
 def test_missing_key_lens(self):
     """Test MISSING_KEY lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     # Missing Value Lens
     age_col = ds.columns[ds.column_index('Age')].identifier
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_key(DS_NAME, age_col, missing_only=True)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING KEYS FOR AGE IN ' + DS_NAME.upper())
     self.assertFalse(wf.has_error)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 3)
     rows = ds.fetch_rows()
     self.assertEquals(len(rows), 24)
     #self.db.append_workflow_module(
     #    viztrail_id=vt.identifier,
     #    command=cmd.load_dataset(f_handle.identifier, DS_NAME + '2')
     #)
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_key(
             DS_NAME,
             ds.columns[ds.column_index('Salary')].identifier,
             missing_only=True
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 3)
     rows = ds.fetch_rows()
     self.assertEquals(len(rows), 55)
     mimir.finalize()
예제 #9
0
 def test_key_repair_lens(self):
     """Test KEY REPAIR lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(KEY_REPAIR_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     ds1 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME])
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_key_repair(DS_NAME, ds1.column_by_name('Empid').identifier)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR EMPID IN ' + DS_NAME.upper())
     # Get dataset
     ds2 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME])
     self.assertEquals(ds1.row_count, ds2.row_count)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 4)
     self.assertEquals(ds.row_count, 2)
     names = set()
     empids = set()
     rowids = set()
     for row in DatasetClient(dataset=ds).rows:
         rowids.add(row.identifier)
         empids.add(int(row.get_value('empid')))
         names.add(row.get_value('name'))
     self.assertTrue(1 in empids)
     self.assertTrue(2 in rowids)
     self.assertTrue('Alice' in names)
     self.assertTrue('Carla' in names)
     # Test error case and command text
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_key_repair('MY DS', 'MY COL')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR \'MY COL\' IN \'MY DS\'')
     mimir.finalize()
예제 #10
0
    def run_update_datasets(self):
        """Test dropping and renaming of datasets."""
        f_handle = self.fileserver.upload_file(CSV_FILE)
        vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'})
        self.db.append_workflow_module(viztrail_id=vt.identifier,
                                       command=cmd.load_dataset(
                                           f_handle.identifier, DS_NAME))
        wf = self.db.get_workflow(viztrail_id=vt.identifier)
        self.assertFalse(wf.has_error)
        self.assertTrue(DS_NAME in wf.modules[-1].datasets)
        new_name = DS_NAME + '_renamed'
        self.db.append_workflow_module(viztrail_id=vt.identifier,
                                       command=cmd.rename_dataset(
                                           DS_NAME, new_name))
        wf = self.db.get_workflow(viztrail_id=vt.identifier)
        self.assertFalse(wf.has_error)
        self.assertTrue(DS_NAME in wf.modules[0].datasets)
        self.assertFalse(new_name in wf.modules[0].datasets)
        self.assertFalse(DS_NAME in wf.modules[-1].datasets)
        self.assertTrue(new_name in wf.modules[-1].datasets)
        self.db.append_workflow_module(viztrail_id=vt.identifier,
                                       command=cmd.drop_dataset(new_name))
        wf = self.db.get_workflow(viztrail_id=vt.identifier)
        self.assertFalse(wf.has_error)
        self.assertFalse(new_name in wf.modules[-1].datasets)
        self.db.append_workflow_module(viztrail_id=vt.identifier,
                                       command=cmd.drop_dataset(new_name))
        wf = self.db.get_workflow(viztrail_id=vt.identifier)
        self.assertTrue(wf.has_error)
        # Delete the Drop Dataset that failed and replace the first drop with
        # a Python module that prints names
        self.db.delete_workflow_module(viztrail_id=vt.identifier,
                                       module_id=wf.modules[-1].identifier)
        wf = self.db.get_workflow(viztrail_id=vt.identifier)
        self.assertFalse(wf.has_error)
        self.db.replace_workflow_module(viztrail_id=vt.identifier,
                                        module_id=wf.modules[-1].identifier,
                                        command=cmd.python_cell("""
for row in vizierdb.get_dataset('""" + new_name + """').rows:
    print row.get_value('Name')
"""))
        wf = self.db.get_workflow(viztrail_id=vt.identifier)
        self.assertFalse(wf.has_error)
        self.assertEquals(wf.modules[-1].stdout[0]['data'], 'Alice\nBob')
        self.assertFalse(DS_NAME in wf.modules[-1].datasets)
        self.assertTrue(new_name in wf.modules[-1].datasets)
예제 #11
0
 def test_eval_command(self):
     """Test functionality to execute a workflow module."""
     # Create new work trail, append a module and retrieve the resulting
     # workflow from default branch HEAD.
     vt = self.db.create_viztrail(ENV.identifier, {'name': 'My Project'})
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     wf = vt.get_workflow()
     self.assertEquals(wf.version, 0)
     self.assertEquals(len(wf.modules), 1)
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('def'))
     wf = vt.get_workflow(branch_id=DEFAULT_BRANCH)
     self.assertEquals(wf.version, 1)
     self.assertEquals(len(wf.modules), 2)
     self.assertEquals(len(wf.modules[0].stdout), 1)
     self.assertEquals(wf.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.modules[0].command.command_identifier,
                       PYTHON_CODE)
     self.assertEquals(wf.modules[0].command.arguments[PYTHON_SOURCE],
                       'abc')
     self.assertEquals(len(wf.modules[1].stdout), 1)
     self.assertEquals(wf.modules[1].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.modules[1].command.command_identifier,
                       PYTHON_CODE)
     self.assertEquals(wf.modules[1].command.arguments[PYTHON_SOURCE],
                       'def')
     self.db.replace_workflow_module(viztrail_id=vt.identifier,
                                     module_id=0,
                                     command=load_dataset('file', 'ds'))
     wf = vt.get_workflow()
     self.assertEquals(wf.version, 2)
     self.assertEquals(len(wf.modules), 2)
     self.assertEquals(len(wf.modules[0].stdout), 1)
     self.assertEquals(wf.modules[0].command.module_type, PACKAGE_VIZUAL)
     self.assertEquals(wf.modules[0].command.command_identifier,
                       VIZUAL_LOAD)
     self.assertEquals(wf.modules[0].command.arguments[PARA_FILE]['fileid'],
                       'file')
     self.assertEquals(wf.modules[0].command.arguments[PARA_NAME], 'ds')
     self.assertEquals(len(wf.modules[1].stdout), 2)
     self.assertEquals(wf.modules[1].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.modules[1].command.command_identifier,
                       PYTHON_CODE)
     self.assertEquals(wf.modules[1].command.arguments[PYTHON_SOURCE],
                       'def')
예제 #12
0
 def run_delete_modules(self):
     """Test deletion of modules."""
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'})
     #print '(1) CREATE DATASET'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.load_dataset(
                                        f_handle.identifier, DS_NAME))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     col_age = ds.column_by_name('Age')
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.update_cell(
                                        DS_NAME, col_age.identifier, 0,
                                        '28'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.update_cell(
                                        DS_NAME, col_age.identifier, 1,
                                        '42'))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     ds = DatasetClient(
         self.datastore.get_dataset(wf.modules[-1].datasets['people']))
     self.assertEquals(int(ds.rows[0].get_value('Age')), 28)
     self.assertEquals(int(ds.rows[1].get_value('Age')), 42)
     # DELETE UPDATE CELL
     self.db.delete_workflow_module(viztrail_id=vt.identifier,
                                    module_id=wf.modules[1].identifier)
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     ds = DatasetClient(
         self.datastore.get_dataset(wf.modules[-1].datasets['people']))
     self.assertEquals(int(ds.rows[0].get_value('Age')), 23)
     # DELETE LOAD (will introduce error)
     self.db.delete_workflow_module(viztrail_id=vt.identifier,
                                    module_id=wf.modules[0].identifier)
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     # DELETE last remaining module
     self.db.delete_workflow_module(viztrail_id=vt.identifier,
                                    module_id=wf.modules[0].identifier)
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
예제 #13
0
 def test_domain_lens(self):
     """Test DOMAIN lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     col_age = ds.column_by_name('Age')
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_domain(DS_NAME, col_age.identifier)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR AGE IN PEOPLE')
     self.assertFalse(wf.has_error)
     self.assertEquals(len(wf.modules), 2)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     rows = ds.fetch_rows()
     self.assertNotEquals(rows[2].values[ds.column_index('Age')], '')
     # Introduce an error. Make sure command formating is correct
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_domain('MY DS', 'MY COL')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR \'MY COL\' IN \'MY DS\'')
     mimir.finalize()
예제 #14
0
 def test_spreadsheet(self):
     """Ensure that the includeDataset option is working for spreadsheet
     updates."""
     # Upload file
     fh = self.fileserver.upload_file(CSV_FILE)
     # Create project
     ph = self.api.create_project(self.ENV.identifier,
                                  {'name': 'My Project'})
     pid = ph['id']
     # Load dataset
     DS_NAME = 'myDS'
     cmd = load_dataset(fh.identifier, DS_NAME)
     result = self.api.append_module(pid, DEFAULT_BRANCH, -1, cmd)
     self.validate_keys(result, ['workflow', 'modules', 'datasets'])
     # Update cell and request to include dataset
     cmd = update_cell(DS_NAME, 0, 0, 'A')
     result = self.api.append_module(pid,
                                     DEFAULT_BRANCH,
                                     -1,
                                     cmd,
                                     includeDataset={
                                         'name': DS_NAME,
                                         'offset': 0
                                     })
     self.validate_keys(result, ['workflow', 'dataset'])
     self.validate_dataset_handle(result['dataset'])
     # In case of an error the result contains the modules
     cmd = update_cell(DS_NAME, 100, 0, 'A')
     result = self.api.append_module(pid,
                                     DEFAULT_BRANCH,
                                     -1,
                                     cmd,
                                     includeDataset={
                                         'name': DS_NAME,
                                         'offset': 0
                                     })
     self.validate_keys(result, ['workflow', 'modules', 'datasets'])
예제 #15
0
 def test_annotations(self):
     """Test DOMAIN lens."""
     # Create new work trail and create dataset from CSV file
     mimir.initialize()
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name': 'My Project'})
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.load_dataset(
                                        f_handle.identifier, DS_NAME))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value(
             DS_NAME,
             ds.column_by_name('AGE').identifier))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     annos = ds.get_annotations(column_id=1, row_id=2)
     self.assertEquals(len(annos), 2)
     for anno in annos:
         self.assertEquals(anno.key, ANNO_UNCERTAIN)
     mimir.finalize()
예제 #16
0
datastore = MimirDataStore(DATASTORE_DIR)
fileserver = DefaultFileServer(FILESERVER_DIR)
vizual = MimirVizualEngine(datastore, fileserver)
db = FileSystemViztrailRepository(VIZTRAILS_DIR, {ENV.identifier: ENV})

mimir.initialize()

vt = db.create_viztrail(ENV.identifier, {'name': 'My Project'})

#
# LOAD DATASET
#
f_handle = fileserver.upload_file(CSV_FILE)
db.append_workflow_module(viztrail_id=vt.identifier,
                          command=cmd.load_dataset(f_handle.identifier,
                                                   DS_NAME))
wf = db.get_workflow(viztrail_id=vt.identifier)
ds = datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
print_dataset(ds)
"""
#
# PICKER LENS
#
db.append_workflow_module(
    viztrail_id=vt.identifier,
    command=cmd.mimir_picker(
        DS_NAME,
        [
            {'pickFrom': 'A'},
            {'pickFrom': 'B'}
        ],
예제 #17
0
 def test_missing_value_lens(self):
     """Test MISSING_VALUE lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value(DS_NAME, ds.column_by_name('AGE').identifier)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper())
     self.assertEquals(len(wf.modules), 2)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     rows = ds.fetch_rows()
     self.assertNotEquals(rows[2].values[ds.column_index('Age')], '')
     # Annotations
     annotations = ds.get_annotations(column_id=1, row_id=4)
     self.assertEquals(len(annotations), 2)
     # MISSING VALUE Lens with value constraint
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'New Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value(
             DS_NAME,
             ds.column_by_name('AGE').identifier,
             constraint='> 30')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper() + ' WITH CONSTRAINT > 30')
     #self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper())
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     rows = ds.fetch_rows()
     self.assertTrue(rows[2].values[ds.column_index('Age')] > 30)
     # Command text in case of error
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value('MY DS', '?', constraint='A B')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     cmd_text = wf.modules[-1].command_text.upper()
     expected_text = 'MISSING VALUES FOR ? IN \'MY DS\'' + ' WITH CONSTRAINT A B'
     self.assertEquals(cmd_text, expected_text)
     mimir.finalize()
예제 #18
0
 def test_picker_lens(self):
     """Test PICKER lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(PICKER_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_picker(DS_NAME, [
             {'pickFrom': ds.column_by_name('Age').identifier},
             {'pickFrom': ds.column_by_name('Salary').identifier}
         ])
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.modules[-1].has_error:
         print wf.modules[-1].stderr
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,SALARY IN ' + DS_NAME.upper())
     # Get dataset
     self.assertEquals(len(wf.modules[-1].datasets), 1)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     columns = [c.name for c in ds.columns]
     self.assertEquals(len(ds.columns), 5)
     self.assertTrue('PICK_ONE_AGE_SALARY' in columns)
     # Pick another column, this time with custom name
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_picker(DS_NAME, [
             {'pickFrom': ds.column_by_name('Age').identifier},
             {'pickFrom': ds.column_by_name('Salary').identifier}
         ],
         pick_as='My Column')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Get dataset
     self.assertEquals(len(wf.modules[-1].datasets), 1)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     columns = [c.name for c in ds.columns]
     self.assertEquals(len(ds.columns), 6)
     self.assertTrue('PICK_ONE_AGE_SALARY' in columns)
     self.assertTrue('My Column' in columns)
     # Pick from a picked column
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_picker(DS_NAME, [
             {'pickFrom': ds.column_by_name('Age').identifier},
             {'pickFrom': ds.column_by_name('PICK_ONE_AGE_SALARY').identifier}
         ],
         pick_as='My Column')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.modules[-1].has_error:
         print wf.modules[-1].stderr
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,PICK_ONE_AGE_SALARY AS \'MY COLUMN\' IN ' + DS_NAME.upper())
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     mimir.finalize()
예제 #19
0
 def test_schema_matching_lens(self):
     """Test SCHEMA_MATCHING lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(DS_NAME, [
             {'column': 'BDate', 'type': 'int'},
             {'column': 'PName', 'type': 'varchar'}
         ], 'new_' + DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT, PNAME VARCHAR) AS NEW_' + DS_NAME.upper())
     # Get dataset
     self.assertEquals(len(wf.modules[-1].datasets), 2)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets['new_' + DS_NAME])
     self.assertEquals(len(ds.columns), 2)
     self.assertEquals(ds.row_count, 2)
     # Error if adding an existing dataset
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(
             DS_NAME,
             [{'column': 'BDate', 'type': 'int'}],
             'new_' + DS_NAME
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.db.replace_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(
             DS_NAME,
             [{'column': 'BDate', 'type': 'int'}],
             'a_new_' + DS_NAME
         ),
         module_id=wf.modules[-1].identifier,
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS A_NEW_' + DS_NAME.upper())
     # Error when adding a dataset with an invalid name
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(
             DS_NAME,
             [{'column': 'BDate', 'type': 'int'}],
             'SOME NAME'
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS \'SOME NAME\'')
     mimir.finalize()
예제 #20
0
 def run_mixed_workflow(self):
     """Test functionality to execute a workflow module."""
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'})
     #print '(1) CREATE DATASET'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.load_dataset(
                                        f_handle.identifier, DS_NAME))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     cmd_text = wf.modules[-1].command_text
     self.assertEquals(cmd_text,
                       'LOAD DATASET people FROM FILE dataset.csv')
     #print '(2) INSERT ROW'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.insert_row(DS_NAME, 1))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     cmd_text = wf.modules[-1].command_text
     self.assertEquals(cmd_text, 'INSERT ROW INTO people AT POSITION 1')
     #print '(3) Set name to Bobby and set variables'
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.python_cell(SET_VARIABLES_PY))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     cmd_text = wf.modules[-1].command_text
     self.assertEquals(cmd_text, SET_VARIABLES_PY)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     #print '(4) Set age to 28'
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.update_cell(
                                        DS_NAME,
                                        ds.column_by_name('Age').identifier,
                                        1, '28'))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     cmd_text = wf.modules[-1].command_text
     self.assertEquals(cmd_text.upper(), 'UPDATE PEOPLE SET [AGE,1] = 28')
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     #print '(5) Change Alice to Bob'
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.update_cell(DS_NAME,
                                 ds.column_by_name('Name').identifier, 0,
                                 'Bob'))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     cmd_text = wf.modules[-1].command_text
     self.assertEquals(cmd_text.upper(),
                       'UPDATE PEOPLE SET [NAME,0] = \'BOB\'')
     #print '(6) UPDATE DATASET WITH FILTER'
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.python_cell(UPDATE_DATASET_WITH_FILTER_PY))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     cmd_text = wf.modules[-1].command_text
     self.assertEquals(cmd_text, UPDATE_DATASET_WITH_FILTER_PY)
     self.assertFalse(wf.has_error)
     # Ensure that all names are Bobby
     ds = DatasetClient(
         self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]))
     age = [23, 28, 32]
     for i in range(len(ds.rows)):
         row = ds.rows[i]
         self.assertEquals(row.get_value('Name'), 'Bobby')
         self.assertEquals(int(row.get_value('Age')), age[i])
예제 #21
0
 def test_branching(self):
     """Test functionality to execute a workflow module."""
     # Create new viztrail and ensure that it contains exactly one branch
     vt = self.db.create_viztrail(ENV.identifier, {'name': 'My Project'})
     self.assertEquals(len(vt.branches), 1)
     self.assertTrue(DEFAULT_BRANCH in vt.branches)
     self.assertEquals(vt.branches[DEFAULT_BRANCH].identifier,
                       DEFAULT_BRANCH)
     # Append two modules to the defaukt branch
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=load_dataset('file', 'name'))
     # Create a branch at the end of the default branch. The new branch
     # contains one workflow with two modules the version number is 2
     newbranch = self.db.create_branch(viztrail_id=vt.identifier,
                                       properties={'name': 'New Branch'})
     self.assertEquals(len(newbranch.workflows), 1)
     self.assertEquals(newbranch.workflows[-1].version, 2)
     wf = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(wf.version, 2)
     self.assertEquals(len(wf.modules), 2)
     self.assertTrue(newbranch.identifier in vt.branches)
     # Ensure that everything has been persisted properly
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIRECTORY,
                                            {ENV.identifier: ENV})
     vt = self.db.get_viztrail(vt.identifier)
     newbranch = vt.branches[newbranch.identifier]
     self.assertEquals(len(newbranch.workflows), 1)
     self.assertEquals(newbranch.workflows[-1].version, 2)
     wf = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(wf.version, 2)
     self.assertEquals(len(wf.modules), 2)
     self.assertTrue(newbranch.identifier in vt.branches)
     self.assertEquals(newbranch.properties.get_properties()['name'],
                       'New Branch')
     # Create a third branch from the start of the master branch
     thirdbranch = self.db.create_branch(viztrail_id=vt.identifier,
                                         properties={'name': 'Next Branch'},
                                         module_id=0)
     wf = vt.get_workflow(branch_id=thirdbranch.identifier)
     self.assertEquals(wf.version, 3)
     self.assertEquals(len(wf.modules), 1)
     # Append modules at end of master and at beginning of thirdbranch
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    branch_id=thirdbranch.identifier,
                                    command=python_cell('def'),
                                    before_id=0)
     master_head = vt.get_workflow()
     self.assertEquals(len(master_head.modules), 3)
     self.assertEquals(master_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(master_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     self.assertEquals(master_head.modules[2].command.module_type,
                       PACKAGE_PYTHON)
     b2_head = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(len(b2_head.modules), 2)
     self.assertEquals(b2_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b2_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     b3_head = vt.get_workflow(branch_id=thirdbranch.identifier)
     self.assertEquals(len(b3_head.modules), 2)
     self.assertEquals(b3_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b3_head.modules[1].command.module_type,
                       PACKAGE_PYTHON)
     # Replace second module of third branch
     self.db.replace_workflow_module(
         viztrail_id=vt.identifier,
         branch_id=thirdbranch.identifier,
         module_id=b3_head.modules[1].identifier,
         command=load_dataset('file', 'name'))
     b3_head = vt.get_workflow(branch_id=thirdbranch.identifier)
     self.assertEquals(len(b3_head.modules), 2)
     self.assertEquals(b3_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b3_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     master_head = vt.get_workflow()
     self.assertEquals(len(master_head.modules), 3)
     self.assertEquals(master_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(master_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     self.assertEquals(master_head.modules[2].command.module_type,
                       PACKAGE_PYTHON)
     b2_head = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(len(b2_head.modules), 2)
     self.assertEquals(b2_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b2_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     # Ensure there are exceptions raised when branching of an unknown branch
     # or module
     with self.assertRaises(ValueError):
         self.db.create_branch(viztrail_id=vt.identifier,
                               source_branch='unknonw-branch',
                               properties={'name': 'New Branch'})
     with self.assertRaises(ValueError):
         self.db.create_branch(viztrail_id=vt.identifier,
                               properties={'name': 'New Branch'},
                               module_id=100)
     with self.assertRaises(ValueError):
         self.db.create_branch(viztrail_id=vt.identifier)
     # Test branch provenance
     self.assertEquals(newbranch.provenance.source_branch, DEFAULT_BRANCH)
     self.assertEquals(newbranch.provenance.workflow_version, 1)
     self.assertEquals(newbranch.provenance.module_id, 1)
     self.assertEquals(thirdbranch.provenance.source_branch, DEFAULT_BRANCH)
     self.assertEquals(thirdbranch.provenance.workflow_version, 1)
     self.assertEquals(thirdbranch.provenance.module_id, 0)
예제 #22
0
 def test_append_module(self):
     """Test appending modules."""
     # Create new viztrail.
     vt = self.db.create_viztrail(ENV.identifier, {'name': 'My Project'})
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=load_dataset('file', 'name'))
     # The default branch should have two versions. The first versions contains
     # one module and the second version contains two modules
     self.assertEquals(len(vt.branches[DEFAULT_BRANCH].workflows), 2)
     v1 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[0].version)
     v2 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[1].version)
     head = self.db.get_workflow(viztrail_id=vt.identifier,
                                 branch_id=DEFAULT_BRANCH)
     self.assertEquals(len(v1.modules), 1)
     self.assertEquals(len(v2.modules), 2)
     self.assertEquals(len(head.modules), 2)
     # Ensure that all modules have non-negative identifier
     for m in head.modules:
         self.assertTrue(m.identifier >= 0)
     self.assertEquals(head.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(head.modules[1].command.module_type, PACKAGE_VIZUAL)
     self.assertEquals(head.version, 1)
     # Re-load the viztrails to ensure that all information has been persisted properly
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIRECTORY,
                                            {ENV.identifier: ENV})
     vt = self.db.get_viztrail(vt.identifier)
     self.assertEquals(len(vt.branches[DEFAULT_BRANCH].workflows), 2)
     v1 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[0].version)
     v2 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[1].version)
     head = self.db.get_workflow(viztrail_id=vt.identifier,
                                 branch_id=DEFAULT_BRANCH)
     self.assertEquals(len(v1.modules), 1)
     self.assertEquals(len(v2.modules), 2)
     self.assertEquals(len(head.modules), 2)
     # Ensure that all modules have non-negative identifier
     for m in head.modules:
         self.assertTrue(m.identifier >= 0)
     self.assertEquals(head.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(head.modules[1].command.module_type, PACKAGE_VIZUAL)
     self.assertEquals(head.version, 1)
     # Append a third moduel to the head of the default branch
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('def'))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertEquals(len(wf.modules), 3)
     for m in wf.modules:
         self.assertTrue(m.identifier >= 0)
         self.assertEquals(m.stdout[0]['data'],
                           'SUCCESS ' + str(m.identifier))
     self.assertEquals(wf.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.modules[1].command.module_type, PACKAGE_VIZUAL)
     self.assertEquals(wf.modules[2].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.version, 2)
     # Append a module to the first version in the branch. The resulting new
     # branch HEAD is expected to contain only two modules then.
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    workflow_version=0,
                                    command=python_cell('def'))
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIRECTORY,
                                            {ENV.identifier: ENV})
     vt = self.db.get_viztrail(vt.identifier)
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertEquals(len(wf.modules), 2)
     for m in wf.modules:
         self.assertTrue(m.identifier >= 0)
         self.assertEquals(m.stdout[0]['data'],
                           'SUCCESS ' + str(m.identifier))
     self.assertEquals(wf.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.modules[1].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.version, 3)