Python load_dataset示例，vizier.engine.packages.vizual.command.load_dataset Python示例

示例#1

0

显示文件

文件： test_vizual_package.py 项目： sanchitcop19/web-api-async

 def test_load_dataset(self):
     """Test validation of the load dataset command."""
     vizual.load_dataset(dataset_name='ABC',
                         file={pckg.FILE_ID: '493ewkfj485ufjw490feofj'},
                         validate=True)
     vizual.load_dataset(dataset_name='ABC',
                         file={pckg.FILE_ID: '493ewkfj485ufjw490feofj'},
                         load_format='json',
                         detect_headers=True,
                         infer_types=True,
                         validate=True)

示例#2

0

显示文件

文件： test_multiprocess_update.py 项目： sanchitcop19/web-api-async

 def test_skip_modules(self):
     """Test replacing a module in a workflow where dome cells do not
     require to be re-executed because they access a different dataset.
     """
     project = self.engine.projects.create_project()
     branch_id = project.get_default_branch().identifier
     fh1 = project.filestore.upload_file(CSV_FILE)
     fh2 = project.filestore.upload_file(CSV_FILE)
     self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         command=load_dataset(dataset_name=DATASET_NAME,
                              file={pckg.FILE_ID: fh1.identifier}))
     self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         command=load_dataset(dataset_name=SECOND_DATASET_NAME,
                              file={pckg.FILE_ID: fh2.identifier}))
     for i in range(10):
         if i in [0, 2, 4, 6, 8]:
             cmd = command = python_cell(PY_ADD_ONE)
         else:
             cmd = command = python_cell(PY_ADD_SECOND)
         self.engine.append_workflow_module(project_id=project.identifier,
                                            branch_id=branch_id,
                                            command=cmd)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     wf = project.viztrail.default_branch.head
     self.assertTrue(wf.get_state().is_success)
     datasets = [module.datasets for module in wf.modules[4:]]
     self.assert_module_count_is(project, 12)
     # Replace a module that updates the first datasets. All modules that
     # access the second dataset should remain unchanged.
     cmd = command = python_cell(PY_ADD_TEN)
     self.engine.replace_workflow_module(project_id=project.identifier,
                                         branch_id=branch_id,
                                         module_id=wf.modules[4].identifier,
                                         command=cmd)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     wf = project.viztrail.default_branch.head
     self.assertTrue(wf.get_state().is_success)
     i = 0
     for module in wf.modules[4:]:
         self.assertNotEqual(datasets[i][DATASET_NAME].identifier,
                             module.datasets[DATASET_NAME].identifier)
         self.assertEqual(datasets[i][SECOND_DATASET_NAME].identifier,
                          module.datasets[SECOND_DATASET_NAME].identifier)
         i += 1

示例#3

0

显示文件

 def test_create_api_from_dictionary(self):
     """Test creating the processor instance with properties parameter
     instead of api.
     """
     processor = VizualTaskProcessor(
         properties={
             PROPERTY_API:
             ClassLoader.to_dict(
                 module_name='vizier.engine.packages.vizual.api.fs',
                 class_name='DefaultVizualApi')
         })
     fh = self.filestore.upload_file(CSV_FILE)
     cmd = vizual.load_dataset(dataset_name=DATASET_NAME,
                               file={pckg.FILE_ID: fh.identifier},
                               validate=True)
     result = processor.compute(command_id=cmd.command_id,
                                arguments=cmd.arguments,
                                context=TaskContext(
                                    project_id=5,
                                    datastore=self.datastore,
                                    filestore=self.filestore,
                                    artifacts={}))
     self.assertIsNotNone(result.provenance.write)
     self.assertTrue(DATASET_NAME in result.provenance.write)
     dataset_id = result.provenance.write[DATASET_NAME].identifier
     self.assertTrue(result.provenance.read is None
                     or len(result.provenance.read) == 0)
     self.assertIsNotNone(result.provenance.resources)
     self.assertEqual(result.provenance.resources[RESOURCE_DATASET],
                      dataset_id)

示例#4

0

显示文件

 def test_load_dataset(self):
     """Test functionality to load a dataset."""
     # Create a new dataset
     fh = self.filestore.upload_file(CSV_FILE)
     cmd = vizual.load_dataset(dataset_name='ABC',
                               file={pckg.FILE_ID: fh.identifier},
                               validate=True)
     result = self.processor.compute(command_id=cmd.command_id,
                                     arguments=cmd.arguments,
                                     context=TaskContext(
                                         project_id=5,
                                         datastore=self.datastore,
                                         filestore=self.filestore,
                                         artifacts={}))
     self.assertIsNotNone(result.provenance.write)
     self.assertTrue('abc' in result.provenance.write)
     dataset_id = result.provenance.write['abc'].identifier
     self.assertTrue(result.provenance.read is None
                     or len(result.provenance.read) == 0)
     self.assertIsNotNone(result.provenance.resources)
     self.assertEqual(result.provenance.resources[RESOURCE_DATASET],
                      dataset_id)
     # Running load again will not change the dataset identifier
     result = self.processor.compute(
         command_id=cmd.command_id,
         arguments=cmd.arguments,
         context=TaskContext(project_id=5,
                             datastore=self.datastore,
                             filestore=self.filestore,
                             artifacts={},
                             resources=result.provenance.resources))
     self.assertEqual(result.provenance.write['abc'].identifier, dataset_id)
     self.assertEqual(result.provenance.resources[RESOURCE_DATASET],
                      dataset_id)

示例#5

0

显示文件

文件： test_synchronous_backend.py 项目： sanchitcop19/web-api-async

 def test_can_execute(self):
     """Test the can execute method with different commands."""
     self.assertTrue(
         self.backend.can_execute(
             vizual.load_dataset(dataset_name=DATASET_NAME,
                                 file={pckg.FILE_ID: '000'},
                                 validate=True)))
     self.assertTrue(
         self.backend.can_execute(
             vizual.update_cell(dataset_name=DATASET_NAME,
                                column=1,
                                row=0,
                                value=9,
                                validate=True)))
     self.assertTrue(
         self.backend.can_execute(
             pycell.python_cell(source=CREATE_DATASET_PY, validate=True)))
     self.assertFalse(
         self.backend.can_execute(
             vizual.insert_row(dataset_name=DATASET_NAME,
                               position=1,
                               validate=True)))
     self.assertFalse(
         self.backend.can_execute(
             vizual.drop_dataset(dataset_name=DATASET_NAME, validate=True)))

示例#6

0

显示文件

文件： test_multiprocess_append.py 项目： mikebrachmann/web-api-async

 def test_execute_with_error(self):
     """Test running a sequence of statements where we (potentially)append to
     a workflow that in in error state.
     """
     project = self.engine.projects.create_project()
     branch_id = project.get_default_branch().identifier
     fh = project.filestore.upload_file(CSV_FILE)
     cmd = load_dataset(dataset_name=DATASET_NAME,
                        file={pckg.FILE_ID: fh.identifier})
     self.engine.append_workflow_module(project_id=project.identifier,
                                        branch_id=branch_id,
                                        command=cmd)
     for i in range(20):
         cmd = command = python_cell(PY_ADD_ONE_ERROR)
         self.engine.append_workflow_module(project_id=project.identifier,
                                            branch_id=branch_id,
                                            command=cmd)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     wf = project.viztrail.default_branch.head
     self.assertIsNotNone(wf)
     # The second module will raise an error. All following modules should
     # be canceled
     for i in range(20):
         module = wf.modules[i]
         if i == 0:
             self.assertTrue(module.is_success)
         elif i == 1:
             self.assertTrue(module.is_error)
         else:
             self.assertTrue(module.is_canceled)

示例#7

0

显示文件

 def create_workflow(self, project):
     """Create a completed workflow by loading the data file and updating the
     age value of the first row ten times.
     """
     branch_id = project.viztrail.default_branch.identifier
     fh = project.filestore.upload_file(CSV_FILE)
     cmd = load_dataset(
         dataset_name=DATASET_NAME,
         file={pckg.FILE_ID: fh.identifier}
     )
     self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         command=cmd
     )
     for i in range(10):
         cmd = python_cell(PY_ADD_ONE)
         self.engine.append_workflow_module(
             project_id=project.identifier,
             branch_id=branch_id,
             command=cmd
         )
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     for module in project.viztrail.default_branch.head.modules:
         # print("--------=======--------")
         # print(module.command)
         # print(module.outputs)
         # print(module.provenance)
         if not module.is_success:
             print(module.outputs)
         self.assertTrue(module.is_success)
         self.assertTrue(DATASET_NAME in module.provenance.write)
     return branch_id

示例#8

0

显示文件

文件： test_synchronous_backend.py 项目： sanchitcop19/web-api-async

 def test_execute(self):
     """Test executing a sequence of supported commands."""
     context = dict()
     fh = self.backend.projects.get_project(
         self.PROJECT_ID).filestore.upload_file(CSV_FILE)
     cmd = vizual.load_dataset(dataset_name=DATASET_NAME,
                               file={pckg.FILE_ID: fh.identifier},
                               validate=True)
     result = self.backend.execute(task=TaskHandle(
         task_id='000', project_id=self.PROJECT_ID),
                                   command=cmd,
                                   context=context)
     self.assertTrue(result.is_success)
     state = result.provenance.get_database_state(prev_state=dict())
     context = task_context(state)
     cmd = vizual.update_cell(dataset_name=DATASET_NAME,
                              column=1,
                              row=0,
                              value=9,
                              validate=True)
     result = self.backend.execute(task=TaskHandle(
         task_id='000', project_id=self.PROJECT_ID),
                                   command=cmd,
                                   context=context)
     self.assertTrue(result.is_success)
     state = result.provenance.get_database_state(prev_state=state)
     self.assertNotEqual(context[DATASET_NAME],
                         task_context(state)[DATASET_NAME])
     context = task_context(state)
     cmd = pycell.python_cell(source=CREATE_DATASET_PY, validate=True)
     result = self.backend.execute(task=TaskHandle(
         task_id='000', project_id=self.PROJECT_ID),
                                   command=cmd,
                                   context=context)
     self.assertTrue(result.is_success)
     state = result.provenance.get_database_state(prev_state=state)
     self.assertTrue(SECOND_DATASET_NAME in state)
     self.assertEqual(context[DATASET_NAME],
                      task_context(state)[DATASET_NAME])
     context = task_context(state)
     cmd = vizual.update_cell(dataset_name=SECOND_DATASET_NAME,
                              column=1,
                              row=0,
                              value=9,
                              validate=True)
     result = self.backend.execute(task=TaskHandle(
         task_id='000', project_id=self.PROJECT_ID),
                                   command=cmd,
                                   context=context)
     self.assertTrue(result.is_success)
     state = result.provenance.get_database_state(prev_state=state)
     self.assertEqual(context[DATASET_NAME],
                      task_context(state)[DATASET_NAME])
     self.assertNotEqual(context[SECOND_DATASET_NAME],
                         task_context(state)[SECOND_DATASET_NAME])

示例#9

0

显示文件

文件： test_mimir_workflow.py 项目： mikebrachmann/web-api-async

 def test_workflow(self):
     """Run workflows for Mimir configurations."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     project = self.engine.projects.create_project()
     branch_id = project.viztrail.default_branch.identifier
     fh = project.filestore.upload_file(CSV_FILE)
     cmd = load_dataset(dataset_name=DATASET_PEOPLE,
                        file={
                            pckg.FILE_ID: fh.identifier,
                            pckg.FILE_NAME: os.path.basename(CSV_FILE)
                        },
                        infer_types=True)
     self.engine.append_workflow_module(project_id=project.identifier,
                                        branch_id=branch_id,
                                        command=cmd)
     cmd = python_cell(PY_ADD_ONE)
     self.engine.append_workflow_module(project_id=project.identifier,
                                        branch_id=branch_id,
                                        command=cmd)
     wf = project.viztrail.default_branch.head
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     for m in wf.modules:
         print(m)
         self.assertTrue(m.is_success)
     cmd = python_cell(CREATE_DATASET_PY)
     self.engine.insert_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         before_module_id=wf.modules[0].identifier,
         command=cmd)
     wf = project.viztrail.default_branch.head
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     for m in wf.modules:
         print(m)
         self.assertTrue(m.is_success)
     datasets = wf.modules[0].provenance.write
     self.assertTrue(DATASET_FRIENDS in datasets)
     self.assertFalse(DATASET_PEOPLE in datasets)
     for m in wf.modules[1:]:
         datasets = m.provenance.get_database_state(datasets)
         self.assertTrue(DATASET_FRIENDS in datasets)
         self.assertTrue(DATASET_PEOPLE in datasets)
     ds = project.datastore.get_dataset(datasets[DATASET_PEOPLE].identifier)
     rows = ds.fetch_rows()
     self.assertEqual(rows[0].values, ['Alice', 24])
     self.assertEqual(rows[1].values, ['Bob', 32])
     ds = project.datastore.get_dataset(
         datasets[DATASET_FRIENDS].identifier)
     rows = ds.fetch_rows()
     self.assertEqual(rows[0].values, ['Yonder', 23])
     self.assertEqual(rows[1].values, ['Zoe', 34])

示例#10

0

显示文件

 def load_dataset(self):
     """Load a single dataset and return the resulting database state."""
     fh = self.filestore.upload_file(CSV_FILE)
     cmd = vizual.load_dataset(dataset_name=DATASET_NAME,
                               file={pckg.FILE_ID: fh.identifier},
                               validate=True)
     result = self.processor.compute(command_id=cmd.command_id,
                                     arguments=cmd.arguments,
                                     context=TaskContext(
                                         datastore=self.datastore,
                                         filestore=self.filestore))
     return result.provenance.write

示例#11

0

显示文件

 def test_load_dataset(self):
     """Test validation of load dataset command."""
     db = FileSystemFilestore(SERVER_DIR)
     fh = db.upload_file(CSV_FILE)
     cmd = load_dataset(dataset_name='ds',
                        file={
                            pckg.FILE_ID: fh.identifier,
                            pckg.FILE_NAME: fh.file_name
                        },
                        validate=True).to_external_form(command=PACKAGE.get(
                            vizual.VIZUAL_LOAD),
                                                        datasets=DATASETS)
     self.assertEqual(cmd, 'LOAD DATASET ds FROM ' + fh.file_name)
     cmd = load_dataset(dataset_name='ds',
                        file={
                            pckg.FILE_URL: 'http://some.file.url'
                        },
                        validate=True).to_external_form(command=PACKAGE.get(
                            vizual.VIZUAL_LOAD),
                                                        datasets=DATASETS)
     self.assertEqual(cmd, 'LOAD DATASET ds FROM http://some.file.url')
     cmd = load_dataset(dataset_name='ds',
                        file={
                            pckg.FILE_ID: fh.identifier,
                            pckg.FILE_URL: 'http://some.file.url'
                        },
                        validate=True).to_external_form(command=PACKAGE.get(
                            vizual.VIZUAL_LOAD),
                                                        datasets=DATASETS)
     self.assertEqual(cmd, 'LOAD DATASET ds FROM http://some.file.url')
     cmd = load_dataset(dataset_name='ds',
                        file={
                            pckg.FILE_ID: 'Some File'
                        },
                        validate=True).to_external_form(command=PACKAGE.get(
                            vizual.VIZUAL_LOAD),
                                                        datasets=DATASETS)
     self.assertEqual(cmd, 'LOAD DATASET ds FROM \'Some File\'')

示例#12

0

显示文件

文件： test_multiprocess_update.py 项目： sanchitcop19/web-api-async

 def create_workflow(self, project):
     """Create a completed workflow by loading the data file and updating the
     age value of the first row ten times.
     """
     branch_id = project.viztrail.default_branch.identifier
     fh = project.filestore.upload_file(CSV_FILE)
     cmd = load_dataset(dataset_name=DATASET_NAME,
                        file={pckg.FILE_ID: fh.identifier})
     self.engine.append_workflow_module(project_id=project.identifier,
                                        branch_id=branch_id,
                                        command=cmd)
     for i in range(10):
         cmd = python_cell(PY_ADD_ONE)
         self.engine.append_workflow_module(project_id=project.identifier,
                                            branch_id=branch_id,
                                            command=cmd)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     return branch_id

示例#13

0

显示文件

 def test_create_synchronous_workflow(self):
     """Create workflow by appending a sequence of modules that are executed
     synchronously.
     """
     project = self.engine.projects.create_project()
     # MODULE 1
     # --------
     # LOAD people
     fh = project.filestore.upload_file(PEOPLE_FILE)
     module = self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         command=load_dataset(dataset_name='people',
                              file={pckg.FILE_ID: fh.identifier},
                              validate=True))
     self.assertTrue(module.is_success)
     self.assertTrue('people' in module.provenance.write)
     self.assertEqual(len(module.provenance.write['people'].columns), 2)
     # MODULE 2
     # --------
     # UPDATE CELL
     module = self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         command=update_cell(dataset_name='people',
                             column=1,
                             row=0,
                             value='42',
                             validate=True))
     print("STATUS: {}".format(module))
     self.assertTrue(module.is_success)
     self.assertTrue('people' in module.provenance.write)
     # MODULE 3
     # --------
     # LOAD employee
     fh = project.filestore.upload_file(EMPLOYEE_FILE)
     module = self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         command=load_dataset(dataset_name='employee',
                              file={pckg.FILE_ID: fh.identifier},
                              validate=True))
     self.assertTrue(module.is_success)
     self.assertFalse('people' in module.provenance.write)
     self.assertTrue('employee' in module.provenance.write)
     #
     # Reload engine and check the module states
     #
     self.engine = get_engine(AppConfig())
     project = self.engine.projects.get_project(project.identifier)
     modules = project.get_default_branch().get_head().modules
     self.assertEqual(len(modules), 3)
     for m in modules:
         self.assertTrue(m.is_success)
         self.assertIsNotNone(m.timestamp.created_at)
         self.assertIsNotNone(m.timestamp.started_at)
         self.assertIsNotNone(m.timestamp.finished_at)
         self.assertIsNotNone(m.provenance.write)
     self.assertTrue('people' in modules[0].provenance.write)
     self.assertTrue('employee' in modules[-1].provenance.write)
     self.assertNotEqual(modules[0].provenance.write['people'].identifier,
                         modules[1].provenance.write['people'].identifier)

示例#14

0

显示文件

 def test_create_synchronous_workflow_with_errors(self):
     """Create workflow by appending a sequence of modules that are executed
     synchronously.
     """
     project = self.engine.projects.create_project()
     # MODULE 1
     # --------
     # LOAD people
     fh = project.filestore.upload_file(PEOPLE_FILE)
     module = self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         command=load_dataset(dataset_name='people',
                              file={
                                  pckg.FILE_ID: fh.identifier,
                                  pckg.FILE_NAME:
                                  os.path.basename(PEOPLE_FILE)
                              },
                              validate=True))
     project = self.engine.projects.get_project(project.identifier)
     modules = project.get_default_branch().get_head().modules
     for m in modules:
         print(m)
         self.assertTrue(m.is_success)
     # MODULE 2
     # --------
     # UPDATE CELL
     module = self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         command=update_cell(dataset_name='employee',
                             column=1,
                             row=0,
                             value='42',
                             validate=True))
     self.assertTrue(module.is_error)
     # MODULE 2
     # --------
     # INSERT employee
     fh = project.filestore.upload_file(EMPLOYEE_FILE)
     result = self.engine.insert_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         before_module_id=module.identifier,
         command=load_dataset(dataset_name='employee',
                              file={
                                  pckg.FILE_ID:
                                  fh.identifier,
                                  pckg.FILE_NAME:
                                  os.path.basename(EMPLOYEE_FILE)
                              },
                              validate=True))
     self.assertEqual(len(result), 2)
     # Wait for the operations to finish
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     #
     # Reload engine and check the module states
     #
     self.engine = get_engine(AppConfig())
     project = self.engine.projects.get_project(project.identifier)
     modules = project.get_default_branch().get_head().modules
     for m in modules:
         print(m)
         self.assertTrue(m.is_success)
     # MODULE 1
     # --------
     # UPDATE CELL
     module = self.engine.insert_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         before_module_id=modules[0].identifier,
         command=update_cell(dataset_name='friends',
                             column=1,
                             row=0,
                             value='43',
                             validate=True))
     # Wait for the operations to finish
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     modules = project.get_default_branch().get_head().modules
     self.assertEqual(len(modules), 4)
     self.assertTrue(modules[0].is_error)
     for m in modules[1:]:
         self.assertTrue(m.is_canceled)
     # MODULE 1
     # --------
     # INSERT friends
     fh = project.filestore.upload_file(EMPLOYEE_FILE)
     result = self.engine.insert_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         before_module_id=modules[0].identifier,
         command=load_dataset(dataset_name='friends',
                              file={
                                  pckg.FILE_ID:
                                  fh.identifier,
                                  pckg.FILE_NAME:
                                  os.path.basename(EMPLOYEE_FILE)
                              },
                              validate=True))
     self.assertEqual(len(result), 5)
     # Wait for the operations to finish
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     modules = project.get_default_branch().get_head().modules
     self.assertEqual(len(modules), 5)
     for m in modules:
         self.assertTrue(m.is_success)
     self.assertEqual(len(modules[0].provenance.write['friends'].columns),
                      3)
     # REPLACE MODULE 1
     # ----------------
     # Load people dataset instead employee
     fh = project.filestore.upload_file(PEOPLE_FILE)
     result = self.engine.replace_workflow_module(
         project_id=project.identifier,
         branch_id=project.get_default_branch().identifier,
         module_id=modules[0].identifier,
         command=load_dataset(dataset_name='friends',
                              file={
                                  pckg.FILE_ID: fh.identifier,
                                  pckg.FILE_NAME:
                                  os.path.basename(PEOPLE_FILE)
                              },
                              validate=True))
     self.assertEqual(len(result), 5)
     # Wait for the operations to finish
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     modules = project.get_default_branch().get_head().modules
     self.assertEqual(len(modules), 5)
     for m in modules:
         self.assertTrue(m.is_success)
     self.assertEqual(len(modules[0].provenance.write['friends'].columns),
                      2)
     ds = project.datastore.get_dataset(
         modules[0].provenance.write['friends'].identifier)
     self.assertEqual(ds.fetch_rows()[0].values[1], 23)
     #
     # Reload engine and check the module states
     #
     self.engine = get_engine(AppConfig())
     project = self.engine.projects.get_project(project.identifier)
     modules = project.get_default_branch().get_head().modules
     self.assertEqual(len(modules), 5)
     for m in modules:
         self.assertTrue(m.is_success)
     self.assertEqual(len(modules[0].provenance.write['friends'].columns),
                      2)
     ds = project.datastore.get_dataset(
         modules[0].provenance.write['friends'].identifier)
     self.assertEqual(ds.fetch_rows()[0].values[1], 23)

示例#15

0

显示文件

def parse_command(tokens, notebook, datasets=dict()):
    """Parse command line tokens that represent a notebook cell command. The
    command is parse againts the given notebook state. Returns the module
    command or None if the token list does not specify a valid command.

    The function has side effects in case a dataset is loaded from local file.
    In this case the file is uploaded before the command object is returned.

    Parameters
    ----------
    tokens: list(string)
        Command line tokens specifying the command
    notebook: vizier.api.client.resources.notebook.Notebook
        Current notebook state
    datasets: dict, optional
        Mapping of available dataset names to dataset identifier

    Returns
    -------
    vizier.engine.module.command.ModuleCommand
    """
    if len(tokens) == 2:
        if tokens[0] == 'python':
            return pycell.python_cell(source=get_script(tokens[1]))
    elif len(tokens) == 3:
        if tokens[0:2] == ['drop', 'dataset']:
            # drop dataset <dataset>
            dataset_name = tokens[2].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            return vizual.drop_dataset(dataset_name=dataset_name)
    elif len(tokens) >= 4 and tokens[0] == 'filter' and tokens[-2] == 'from':
        # filter <column-1>{::<new-name>} ... from <dataset>
        dataset_name = tokens[-1].lower()
        if not dataset_name in datasets:
            raise ValueError('unknown dataset \'' + dataset_name + '\'')
        ds = notebook.get_dataset(datasets[dataset_name])
        columns = list()
        for col_spec in tokens[1:-2]:
            if '::' in col_spec:
                col = ds.get_column(col_spec[:col_spec.find('::')])
                new_name = col_spec[col_spec.find('::') + 2:]
            else:
                col = ds.get_column(col_spec)
                new_name = None
            entry = {'column': col.identifier}
            if not new_name is None:
                entry['name'] = new_name
            columns.append(entry)
        return vizual.projection(dataset_name=dataset_name, columns=columns)
    elif len(tokens) >= 4 and tokens[0] == 'sort' and tokens[2] == 'by':
        # sort <dataset> by <column-1>{::[DESC|ASC]} ...
        dataset_name = tokens[1].lower()
        if not dataset_name in datasets:
            raise ValueError('unknown dataset \'' + dataset_name + '\'')
        ds = notebook.get_dataset(datasets[dataset_name])
        columns = list()
        for sort_spec in tokens[3:]:
            if '::' in sort_spec:
                col = ds.get_column(sort_spec[:sort_spec.find('::')])
                sort_order = sort_spec[sort_spec.find('::') + 2:].lower()
                sort_order = sort.SORT_ASC if sort_order == 'asc' else sort.SORT_DESC
            else:
                col = ds.get_column(sort_spec)
                sort_order = sort.SORT_ASC
            columns.append({'column': col.identifier, 'order': sort_order})
        return vizual.sort_dataset(dataset_name=dataset_name, columns=columns)
    elif len(tokens) == 5:
        if tokens[0:2] == ['delete', 'column'] and tokens[3] == 'from':
            # delete column <name> from <dataset>
            dataset_name = tokens[4].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            # Get the referenced dataset and column from the current notebook
            # state
            ds = notebook.get_dataset(datasets[dataset_name])
            col = ds.get_column(tokens[2])
            return vizual.delete_column(dataset_name=dataset_name,
                                        column=col.identifier)
        elif tokens[0:2] == ['delete', 'row'] and tokens[3] == 'from':
            # delete row <row-index> from <dataset>
            dataset_name = tokens[4].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            return vizual.delete_row(dataset_name=dataset_name,
                                     row=int(tokens[2]))
        elif tokens[0] == 'load' and tokens[2] == 'from' and tokens[
                3] == 'file':
            # load <name> from file <file>
            filename = tokens[4]
            file_id = notebook.upload_file(filename=filename)
            return vizual.load_dataset(dataset_name=tokens[1],
                                       file={
                                           FILE_ID: file_id,
                                           FILE_NAME:
                                           os.path.basename(filename)
                                       })
        elif tokens[0] == 'load' and tokens[2] == 'from' and tokens[3] == 'url':
            return vizual.load_dataset(dataset_name=tokens[1],
                                       file={FILE_URI: tokens[4]})
        elif tokens[0:2] == ['rename', 'dataset'] and tokens[3] == 'to':
            # rename dataset <dataset> to <new-name>
            dataset_name = tokens[2].lower()
            # Get the referenced dataset and column from the current notebook
            # state
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            return vizual.rename_dataset(dataset_name=dataset_name,
                                         new_name=tokens[4])
        elif tokens[0] == 'update':
            # update <dataset-name> <column-name> <row-index>{ <value>}\
            dataset_name = tokens[1].lower()
            # Get the referenced dataset and column from the current notebook
            # state
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            ds = notebook.get_dataset(datasets[dataset_name])
            col = ds.get_column(tokens[2])
            return vizual.update_cell(dataset_name=dataset_name,
                                      column=col.identifier,
                                      row=int(tokens[3]),
                                      value=cast(tokens[4]))
    elif len(tokens) >= 6 and tokens[0] == 'chart':
        if tokens[0] == 'chart' and tokens[2] == 'on' and tokens[4] == 'with':
            # chart <chart-name> on <dataset-name> with <column-name:label:start-end> ...
            dataset_name = tokens[3].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            ds = notebook.get_dataset(datasets[dataset_name])
            series = list()
            for spec in tokens[5:]:
                s_tokens = spec.split(':')
                if len(s_tokens) != 3:
                    print('invalid data series ' + str(s_tokens))
                    return None
                s = {
                    'column': ds.get_column(s_tokens[0]).identifier,
                    'range': s_tokens[2].replace('-', ':')
                }
                if s_tokens[1] != '':
                    s['label'] = s_tokens[1]
                series.append(s)
            return plot.create_plot(chart_name=tokens[1],
                                    dataset_name=dataset_name,
                                    series=series)
    elif len(tokens) == 7:
        if tokens[0:3] == ['insert', 'row', 'into'
                           ] and tokens[4:6] == ['at', 'position']:
            # insert row into <dataset> at position <row-index>
            dataset_name = tokens[3].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            return vizual.insert_row(dataset_name=dataset_name,
                                     position=int(tokens[6]))
        elif tokens[0:2] == ['rename', 'column'
                             ] and tokens[3] == 'in' and tokens[5] == 'to':
            # rename column <name> in <dataset> to <new-name>
            dataset_name = tokens[4].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            ds = notebook.get_dataset(datasets[dataset_name])
            col = ds.get_column(tokens[2])
            return vizual.rename_column(dataset_name=dataset_name,
                                        column=col.identifier,
                                        name=tokens[6])
    elif len(tokens) == 8:
        if tokens[0:2] == [
                'insert', 'column'
        ] and tokens[3] == 'into' and tokens[5:7] == ['at', 'position']:
            # insert column <name> into <dataset> at position <column-index>
            dataset_name = tokens[4].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            return vizual.insert_column(dataset_name=dataset_name,
                                        position=int(tokens[7]),
                                        name=tokens[2])
        elif tokens[0:2] == [
                'move', 'column'
        ] and tokens[3] == 'in' and tokens[5:7] == ['to', 'position']:
            # move column <name> in <dataset> to position <column-index>
            dataset_name = tokens[4].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            ds = notebook.get_dataset(datasets[dataset_name])
            col = ds.get_column(tokens[2])
            return vizual.move_column(dataset_name=dataset_name,
                                      column=col.identifier,
                                      position=int(tokens[7]))
        elif tokens[0:2] == [
                'move', 'row'
        ] and tokens[3] == 'in' and tokens[5:7] == ['to', 'position']:
            # move row <row-index> in <dataset> to position <target-index>
            dataset_name = tokens[4].lower()
            if not dataset_name in datasets:
                raise ValueError('unknown dataset \'' + dataset_name + '\'')
            return vizual.move_row(dataset_name=dataset_name,
                                   row=int(tokens[2]),
                                   position=int(tokens[7]))
    return None