Пример #1
0
 def test_load(self):
     """Run workflow with default configuration."""
     # Ignore files that raised errors (or are taking too much time to load)
     ignore_files = ['JSONOUTPUTWIDE.csv']
     data_types = set()
     mimir.initialize()
     for filename in os.listdir(LOAD_DIR):
         if filename in ignore_files:
             continue
         print 'LOAD ' + filename
         filename = os.path.join(LOAD_DIR, filename)
         f_handle = self.fileserver.upload_file(filename)
         ds = self.datastore.load_dataset(f_handle)
         ds_load = self.datastore.get_dataset(ds.identifier)
         for col in ds_load.columns:
             data_types.add(col.data_type)
             print '\t' + col.name_in_rdb + ' AS ' + col.name + '(' + col.data_type + ')'
         print '\t' + str(ds.row_count) + ' row(s)'
         self.assertEquals(len(ds.columns), len(ds_load.columns))
         self.assertEquals(ds.column_counter, ds_load.column_counter)
         self.assertEquals(ds.row_counter, ds_load.row_counter)
         rows = ds.fetch_rows()
         self.assertEquals(ds.row_counter, len(rows))
         self.assertEquals(ds.row_count, len(rows))
         for i in range(len(rows)):
             row = rows[i]
             self.assertEquals(row.identifier, i)
             self.assertEquals(len(row.values), len(ds.columns))
     mimir.finalize()
     print data_types
Пример #2
0
 def test_load(self):
     """Run workflow with default configuration."""
     mimir.initialize()
     self.update_cell(CSV_FILE, 2, 0, 'int', 10)
     self.update_cell(CSV_FILE, 2, 0, 'int', 10.3, result_type='real')
     self.update_cell(CSV_FILE, 2, 0, 'int', None)
     self.update_cell(CSV_FILE, 3, 0, 'real', 10.3)
     self.update_cell(CSV_FILE, 3, 0, 'real', 10, result_value=10.0)
     self.update_cell(CSV_FILE, 3, 0, 'real', 'A', result_type='varchar')
     self.update_cell(CSV_FILE, 3, 0, 'real', None)
     self.update_cell(CSV_FILE, 4, 0, 'varchar', 'A')
     self.update_cell(CSV_FILE, 4, 0, 'varchar', 10, result_value='10')
     self.update_cell(CSV_FILE, 4, 0, 'varchar', 10.87, result_value='10.87')
     self.update_cell(CSV_FILE, 4, 0, 'varchar', None)
     self.update_cell(CSV_FILE, 8, 0, 'bool', 'False', result_value=False)
     self.update_cell(CSV_FILE, 8, 0, 'bool', '0', result_value=False)
     self.update_cell(CSV_FILE, 8, 0, 'bool', None)
     self.update_cell(CSV_FILE, 8, 1, 'bool', True, result_value=True)
     self.update_cell(CSV_FILE, 8, 1, 'bool', '1', result_value=True)
     self.update_cell(CSV_FILE, 8, 1, 'bool', 'A', result_value='A', result_type='varchar')
     self.update_cell(CSV_FILE, 8, 1, 'bool', 10.87, result_value='10.87', result_type='varchar')
     self.update_cell(CSV_FILE_DT, 1, 0, 'date', '2018-05-09')
     self.update_cell(CSV_FILE_DT, 1, 0, 'date', '20180509', result_value='20180509', result_type='varchar')
     self.update_cell(CSV_FILE_DT, 1, 0, 'date', None)
     self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', '2018-05-09 12:03:22.0000')
     self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', 'ABC', result_value='ABC', result_type='varchar')
     self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', None)
     mimir.finalize()
Пример #3
0
 def test_type_inference_lens(self):
     """Test TYPE INFERENCE lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds1 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertFalse(wf.has_error)
     # Infer type
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_type_inference(DS_NAME, 0.6)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     print wf.modules[-1].command_text.upper()
     self.assertEquals(wf.modules[-1].command_text.upper(), 'TYPE INFERENCE FOR COLUMNS IN ' + DS_NAME.upper() + ' WITH PERCENT_CONFORM = 0.6')
     # Get dataset
     ds2 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds2.columns), 3)
     self.assertEquals(ds2.row_count, 7)
     ds1_rows = ds1.fetch_rows()
     ds2_rows = ds2.fetch_rows()
     for i in range(ds2.row_count):
         self.assertEquals(ds1_rows[i].values, ds2_rows[i].values)
     mimir.finalize()
Пример #4
0
 def test_mimir_datastore(self):
     """Run test for Mimir datastore."""
     mimir.initialize()
     self.run_tests(MIMIR_DATASTORE)
     self.set_up(MIMIR_DATASTORE)
     self.load_tsv()
     self.tear_down(MIMIR_DATASTORE)
     mimir.finalize()
Пример #5
0
 def test_vt_mimir(self):
     """Run workflows for Mimir configurations."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     self.set_up_mimir()
     self.run_workflow()
     mimir.finalize()
Пример #6
0
 def test_mimir_client(self):
     """Run tests for default engine and Mimir data store."""
     mimir.initialize()
     self.fs = DefaultFileServer(SERVER_DIR)
     self.ds = MimirDataStore(DATASTORE_DIR)
     self.run_client_tests(
         VizierDBClient(self.ds, dict(),
                        DefaultVizualEngine(self.ds, self.fs)))
     mimir.finalize()
Пример #7
0
def initialize():
    """Initialize the connection to the Mimir gateway if Mimir execution
    environment is used.
    """
    if ENGINEENV_MIMIR in config.envs:
        try:
            import vistrails.packages.mimir.init as mimir
            mimir.initialize()
        except Exception as ex:
            pass
Пример #8
0
 def test_geocode_lens(self):
     """Test GEOCODE lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(GEO_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertFalse(wf.has_error)
     # Geocode Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_geocode(
             DS_NAME,
             'GOOGLE',
             house_nr=ds.column_by_name('STRNUMBER').identifier,
             street=ds.column_by_name('STRNAME').identifier,
             city=ds.column_by_name('CITY').identifier,
             state=ds.column_by_name('STATE').identifier
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE HOUSE_NUMBER=STRNUMBER,STREET=STRNAME,CITY=CITY,STATE=STATE PEOPLE USING GOOGLE')
     self.assertFalse(wf.has_error)
     self.assertEquals(len(wf.modules), 2)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 6)
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_geocode(
             DS_NAME,
             'GOOGLE'
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE PEOPLE USING GOOGLE')
     self.assertFalse(wf.has_error)
     self.assertEquals(len(wf.modules), 3)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 8)
     mimir.finalize()
Пример #9
0
 def test_vt_mimir(self):
     """Run workflows for Mimir configurations."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     self.set_up_mimir()
     self.run_python_workflow()
     self.set_up_mimir()
     self.run_mixed_workflow()
     self.set_up_mimir()
     self.run_delete_modules()
     self.set_up_mimir()
     self.run_erroneous_workflow()
     mimir.finalize()
Пример #10
0
 def test_key_repair_lens(self):
     """Test KEY REPAIR lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(KEY_REPAIR_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     ds1 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME])
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_key_repair(DS_NAME, ds1.column_by_name('Empid').identifier)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR EMPID IN ' + DS_NAME.upper())
     # Get dataset
     ds2 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME])
     self.assertEquals(ds1.row_count, ds2.row_count)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 4)
     self.assertEquals(ds.row_count, 2)
     names = set()
     empids = set()
     rowids = set()
     for row in DatasetClient(dataset=ds).rows:
         rowids.add(row.identifier)
         empids.add(int(row.get_value('empid')))
         names.add(row.get_value('name'))
     self.assertTrue(1 in empids)
     self.assertTrue(2 in rowids)
     self.assertTrue('Alice' in names)
     self.assertTrue('Carla' in names)
     # Test error case and command text
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_key_repair('MY DS', 'MY COL')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR \'MY COL\' IN \'MY DS\'')
     mimir.finalize()
Пример #11
0
 def test_missing_key_lens(self):
     """Test MISSING_KEY lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     # Missing Value Lens
     age_col = ds.columns[ds.column_index('Age')].identifier
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_key(DS_NAME, age_col, missing_only=True)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING KEYS FOR AGE IN ' + DS_NAME.upper())
     self.assertFalse(wf.has_error)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 3)
     rows = ds.fetch_rows()
     self.assertEquals(len(rows), 24)
     #self.db.append_workflow_module(
     #    viztrail_id=vt.identifier,
     #    command=cmd.load_dataset(f_handle.identifier, DS_NAME + '2')
     #)
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_key(
             DS_NAME,
             ds.columns[ds.column_index('Salary')].identifier,
             missing_only=True
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertEquals(len(ds.columns), 3)
     rows = ds.fetch_rows()
     self.assertEquals(len(rows), 55)
     mimir.finalize()
Пример #12
0
 def test_datastore(self):
     """Test functionality of the file server data store."""
     mimir.initialize()
     ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE))
     self.assertEquals(ds.column_counter, 3)
     self.assertEquals(ds.row_counter, 2)
     self.assertEquals(ds.row_count, 2)
     cols = [('NAME', COL_PREFIX + '0', 'varchar'),
             ('AGE', COL_PREFIX + '1', 'int'),
             ('SALARY', COL_PREFIX + '2', 'varchar')]
     control_rows = [(0, ['Alice', 23, '35K']), (1, ['Bob', 32, '30K'])]
     for column in ds.columns:
         self.validate_column(column, cols[column.identifier])
     self.validate_rowid_column(ds.rowid_column)
     self.validate_rows(ds.fetch_rows(), control_rows)
     # Get dataset and repeat tests
     ds = self.db.get_dataset(ds.identifier)
     self.assertEquals(ds.column_counter, 3)
     self.assertEquals(ds.row_counter, 2)
     self.assertEquals(len(ds.row_ids), 2)
     for column in ds.columns:
         self.validate_column(column, cols[column.identifier])
     self.validate_rowid_column(ds.rowid_column)
     self.validate_rows(ds.fetch_rows(), control_rows)
     # Create dataset
     names = ['NAME', 'AGE', 'SALARY']
     rows = ds.fetch_rows()
     rows[0].values[0] = 'Jane'
     rows = [rows[1], rows[0]]
     ds = self.db.create_dataset(columns=ds.columns, rows=rows)
     ds = self.db.get_dataset(ds.identifier)
     for i in range(3):
         col = ds.columns[i]
         self.assertEquals(col.identifier, i)
         self.assertEquals(col.name, names[i])
     rows = ds.fetch_rows()
     for i in range(len(rows)):
         row = rows[(len(rows) - 1) - i]
         self.assertEquals(row.identifier, i)
     self.assertEquals(rows[1].values[0], 'Jane')
     # DONE
     mimir.finalize()
Пример #13
0
 def test_domain_lens(self):
     """Test DOMAIN lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     col_age = ds.column_by_name('Age')
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_domain(DS_NAME, col_age.identifier)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR AGE IN PEOPLE')
     self.assertFalse(wf.has_error)
     self.assertEquals(len(wf.modules), 2)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     rows = ds.fetch_rows()
     self.assertNotEquals(rows[2].values[ds.column_index('Age')], '')
     # Introduce an error. Make sure command formating is correct
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_domain('MY DS', 'MY COL')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR \'MY COL\' IN \'MY DS\'')
     mimir.finalize()
Пример #14
0
 def test_annotations(self):
     """Test DOMAIN lens."""
     # Create new work trail and create dataset from CSV file
     mimir.initialize()
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name': 'My Project'})
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=cmd.load_dataset(
                                        f_handle.identifier, DS_NAME))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value(
             DS_NAME,
             ds.column_by_name('AGE').identifier))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     annos = ds.get_annotations(column_id=1, row_id=2)
     self.assertEquals(len(annos), 2)
     for anno in annos:
         self.assertEquals(anno.key, ANNO_UNCERTAIN)
     mimir.finalize()
Пример #15
0
                                         row_ids[row_index]).set_annotation(
                                             'mimir:uncertain', 'true')
    return annotations


def get_tempfile():
    """Return the path to a temporary CSV file. Try to get a unique name to
    avoid problems with existing datasets.

    Returns
    -------
    string
    """
    tmp_prefix = 'DS_' + get_unique_identifier()
    return tempfile.mkstemp(suffix='.csv', prefix=tmp_prefix)[1]


CSV_FILE = './dataset_load_test.csv'
#CSV_FILE = '../data/dataset.csv'
#CSV_FILE = './reload_dataset.csv'

mimir.initialize()

ds = load_dataset(os.path.abspath(CSV_FILE))
print[col.name for col in ds.columns]
for row_id in ds.row_ids:
    for col in ds.columns:
        anno = ds.annotations.for_cell(col.identifier, row_id)

mimir.finalize()
Пример #16
0
 def test_mimir_engine(self):
     """Test functionality if the Mimir VizUAL engine."""
     import vistrails.packages.mimir.init as mimir
     mimir.initialize()
     self.run_engine_tests(ENGINEENV_MIMIR)
     mimir.finalize()
Пример #17
0
 def test_missing_value_lens(self):
     """Test MISSING_VALUE lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value(DS_NAME, ds.column_by_name('AGE').identifier)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper())
     self.assertEquals(len(wf.modules), 2)
     # Get dataset
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     rows = ds.fetch_rows()
     self.assertNotEquals(rows[2].values[ds.column_index('Age')], '')
     # Annotations
     annotations = ds.get_annotations(column_id=1, row_id=4)
     self.assertEquals(len(annotations), 2)
     # MISSING VALUE Lens with value constraint
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'New Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value(
             DS_NAME,
             ds.column_by_name('AGE').identifier,
             constraint='> 30')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.has_error:
         print wf.modules[-1].stderr[0]
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper() + ' WITH CONSTRAINT > 30')
     #self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper())
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     rows = ds.fetch_rows()
     self.assertTrue(rows[2].values[ds.column_index('Age')] > 30)
     # Command text in case of error
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_missing_value('MY DS', '?', constraint='A B')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     cmd_text = wf.modules[-1].command_text.upper()
     expected_text = 'MISSING VALUES FOR ? IN \'MY DS\'' + ' WITH CONSTRAINT A B'
     self.assertEquals(cmd_text, expected_text)
     mimir.finalize()
Пример #18
0
 def test_picker_lens(self):
     """Test PICKER lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(PICKER_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_picker(DS_NAME, [
             {'pickFrom': ds.column_by_name('Age').identifier},
             {'pickFrom': ds.column_by_name('Salary').identifier}
         ])
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.modules[-1].has_error:
         print wf.modules[-1].stderr
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,SALARY IN ' + DS_NAME.upper())
     # Get dataset
     self.assertEquals(len(wf.modules[-1].datasets), 1)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     columns = [c.name for c in ds.columns]
     self.assertEquals(len(ds.columns), 5)
     self.assertTrue('PICK_ONE_AGE_SALARY' in columns)
     # Pick another column, this time with custom name
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_picker(DS_NAME, [
             {'pickFrom': ds.column_by_name('Age').identifier},
             {'pickFrom': ds.column_by_name('Salary').identifier}
         ],
         pick_as='My Column')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Get dataset
     self.assertEquals(len(wf.modules[-1].datasets), 1)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     columns = [c.name for c in ds.columns]
     self.assertEquals(len(ds.columns), 6)
     self.assertTrue('PICK_ONE_AGE_SALARY' in columns)
     self.assertTrue('My Column' in columns)
     # Pick from a picked column
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_picker(DS_NAME, [
             {'pickFrom': ds.column_by_name('Age').identifier},
             {'pickFrom': ds.column_by_name('PICK_ONE_AGE_SALARY').identifier}
         ],
         pick_as='My Column')
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     if wf.modules[-1].has_error:
         print wf.modules[-1].stderr
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,PICK_ONE_AGE_SALARY AS \'MY COLUMN\' IN ' + DS_NAME.upper())
     ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
     mimir.finalize()
Пример #19
0
 def test_schema_matching_lens(self):
     """Test SCHEMA_MATCHING lens."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     mimir.initialize()
     f_handle = self.fileserver.upload_file(CSV_FILE)
     vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'})
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.load_dataset(f_handle.identifier, DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     # Missing Value Lens
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(DS_NAME, [
             {'column': 'BDate', 'type': 'int'},
             {'column': 'PName', 'type': 'varchar'}
         ], 'new_' + DS_NAME)
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT, PNAME VARCHAR) AS NEW_' + DS_NAME.upper())
     # Get dataset
     self.assertEquals(len(wf.modules[-1].datasets), 2)
     ds = self.datastore.get_dataset(wf.modules[-1].datasets['new_' + DS_NAME])
     self.assertEquals(len(ds.columns), 2)
     self.assertEquals(ds.row_count, 2)
     # Error if adding an existing dataset
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(
             DS_NAME,
             [{'column': 'BDate', 'type': 'int'}],
             'new_' + DS_NAME
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.db.replace_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(
             DS_NAME,
             [{'column': 'BDate', 'type': 'int'}],
             'a_new_' + DS_NAME
         ),
         module_id=wf.modules[-1].identifier,
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertFalse(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS A_NEW_' + DS_NAME.upper())
     # Error when adding a dataset with an invalid name
     self.db.append_workflow_module(
         viztrail_id=vt.identifier,
         command=cmd.mimir_schema_matching(
             DS_NAME,
             [{'column': 'BDate', 'type': 'int'}],
             'SOME NAME'
         )
     )
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertTrue(wf.has_error)
     self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS \'SOME NAME\'')
     mimir.finalize()