Python Manifest.sweep_all_changes示例，gtmcore.dataset.manifest.Manifest.sweep_all_changes Python示例

示例#1

0

显示文件

    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               dataset_name,
                               transaction_id,
                               cancel=False,
                               rollback=False,
                               client_mutation_id=None):
        username = get_logged_in_username()
        ds = InventoryManager().load_dataset(username,
                                             owner,
                                             dataset_name,
                                             author=get_logged_in_author())
        with ds.lock():
            if cancel and rollback:
                logger.warning(
                    f"Cancelled tx {transaction_id}, doing git reset")
                # TODO: Add ability to reset
            else:
                logger.info(
                    f"Done batch upload {transaction_id}, cancelled={cancel}")
                if cancel:
                    logger.warning("Sweeping aborted batch upload.")

                m = "Cancelled upload `{transaction_id}`. " if cancel else ''

                # Sweep up and process all files added during upload
                manifest = Manifest(ds, username)
                manifest.sweep_all_changes(upload=True, extra_msg=m)

        return CompleteDatasetUploadTransaction(success=True)

示例#2

0

显示文件

文件： test_dataset_files_mutations.py 项目： Pandulis/gigantum-client

    def test_delete_dataset_files_errors(self, fixture_working_dir, snapshot):
        im = InventoryManager(fixture_working_dir[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset-delete-2",
                               storage_type="gigantum_object_v1",
                               description="testing delete")
        m = Manifest(ds, 'default')

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "fdsfgfd")
        m.sweep_all_changes()

        revision = m.dataset_revision
        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, revision,
                         "test1.txt")) is True
        assert os.path.exists(
            os.path.join(m.cache_mgr.cache_root, revision,
                         "test2.txt")) is True

        query = """
                   mutation myMutation {
                     deleteDatasetFiles(input: {datasetOwner: "default", datasetName: "dataset-delete-2", 
                                                keys: ["testdfdfdfdf.txt"]}) {
                         success
                     }
                   }
                   """
        result = fixture_working_dir[2].execute(query)
        assert 'errors' in result

示例#3

0

显示文件

文件： test_dataset_files_mutations.py 项目： Pandulis/gigantum-client

    def test_move_dataset_file(self, fixture_working_dir, snapshot):
        im = InventoryManager(fixture_working_dir[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset-move",
                               storage_type="gigantum_object_v1",
                               description="testing move")
        m = Manifest(ds, 'default')

        revision = m.dataset_revision
        helper_append_file(m.cache_mgr.cache_root, revision, "test1.txt",
                           "asdfasdghndfdf")
        m.sweep_all_changes()

        revision = m.dataset_revision
        cr = m.cache_mgr.cache_root
        assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is True

        query = """
                   mutation myMutation {
                     moveDatasetFile(input: {datasetOwner: "default", datasetName: "dataset-move", 
                                             srcPath: "test1.txt", dstPath: "test1-renamed.txt"}) {
                         updatedEdges {
                            node {
                              id
                              key
                              isDir
                              isLocal
                              size
                            }
                         }
                     }
                   }
                   """
        result = fixture_working_dir[2].execute(query)
        assert 'errors' not in result
        snapshot.assert_match(result)

        revision = m.dataset_revision
        cr = m.cache_mgr.cache_root
        assert os.path.exists(os.path.join(cr, revision, "test1.txt")) is False
        assert os.path.exists(os.path.join(cr, revision,
                                           "test1-renamed.txt")) is True

示例#4

0

显示文件

    def test_sync__dataset(self, mock_config_file):
        def update_feedback(msg: str,
                            has_failures: Optional[bool] = None,
                            failure_detail: Optional[str] = None,
                            percent_complete: Optional[float] = None):
            """Method to update the job's metadata and provide feedback to the UI"""
            assert has_failures is None or has_failures is False
            assert failure_detail is None

        def dispatch_query_mock(self, job_key):
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={'completed_bytes': '100'})

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            return "afakejobkey"

        username = '******'
        im = InventoryManager(mock_config_file[0])
        ds = im.create_dataset(username, username, 'dataset-1',
                               'gigantum_object_v1')
        m = Manifest(ds, username)
        wf = DatasetWorkflow(ds)

        iom = IOManager(ds, m)
        assert len(glob.glob(f'{iom.push_dir}/*')) == 0
        wf.publish(username=username, feedback_callback=update_feedback)

        # Put a file into the dataset that needs to be pushed
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        assert len(glob.glob(f'{iom.push_dir}/*')) == 1
        with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
            with patch.object(Dispatcher, 'query_task', dispatch_query_mock):
                wf.sync(username=username, feedback_callback=update_feedback)
                assert os.path.exists(wf.remote)
                assert len(glob.glob(f'{iom.push_dir}/*')) == 0

示例#5

0

显示文件

文件： test_dataset_mutations.py 项目： Pandulis/gigantum-client

    def test_update_dataset_link(self, fixture_working_dir, snapshot):
        im = InventoryManager(fixture_working_dir[0])
        lb = im.create_labbook('default', 'default', 'test-lb',
                               'testing dataset links')
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        manifest = Manifest(ds, 'default')
        helper_append_file(manifest.cache_mgr.cache_root,
                           manifest.dataset_revision, "test1.txt", "12345")
        manifest.sweep_all_changes()

        # Fake publish to a local bare repo
        _MOCK_create_remote_repo2(ds, 'default', None, None)

        assert os.path.exists(os.path.join(lb.root_dir,
                                           '.gitmodules')) is False

        overview_query = """
                {
                  labbook(owner: "default", name:"test-lb")
                  {
                    linkedDatasets{
                      name
                      overview {
                          localBytes
                          totalBytes
                      }
                    }
                  }
                }
                """

        query = """
                   mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!,
                                       $a: String!, $du: String) {
                     modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn,
                                               action: $a, datasetUrl: $du}) {
                         newLabbookEdge {
                           node {
                             id
                             name
                             description
                             linkedDatasets {
                               name
                             }
                           }
                         }
                     }
                   }
                   """
        variables = {
            "lo": "default",
            "ln": "test-lb",
            "do": "default",
            "dn": "dataset100",
            "a": "link",
            "du": ds.remote
        }
        result = fixture_working_dir[2].execute(query,
                                                variable_values=variables)
        assert "errors" not in result
        snapshot.assert_match(result)

        assert os.path.exists(os.path.join(lb.root_dir, '.gitmodules')) is True
        dataset_submodule_dir = os.path.join(lb.root_dir, '.gigantum',
                                             'datasets', 'default',
                                             'dataset100')
        assert os.path.exists(dataset_submodule_dir) is True
        assert os.path.exists(os.path.join(dataset_submodule_dir,
                                           '.gigantum')) is True
        assert os.path.exists(
            os.path.join(dataset_submodule_dir, 'test_file.dat')) is False

        with open(os.path.join(lb.root_dir, '.gitmodules'), 'rt') as mf:
            data = mf.read()
        assert len(data) > 0

        # check overview
        result = fixture_working_dir[2].execute(overview_query)
        assert "errors" not in result
        assert result['data']['labbook']['linkedDatasets'][0]['overview'][
            'localBytes'] == '5'
        assert result['data']['labbook']['linkedDatasets'][0]['overview'][
            'totalBytes'] == '5'

        # Make change to published dataset
        git_dir = os.path.join(tempfile.gettempdir(),
                               'test_update_dataset_link_mutation')
        try:
            os.makedirs(git_dir)
            call_subprocess(['git', 'clone', ds.remote],
                            cwd=git_dir,
                            check=True)
            with open(os.path.join(git_dir, ds.name, 'test_file.dat'),
                      'wt') as tf:
                tf.write("Test File Contents")
            call_subprocess(['git', 'add', 'test_file.dat'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)
            call_subprocess(['git', 'commit', '-m', 'editing repo'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)
            call_subprocess(['git', 'push'],
                            cwd=os.path.join(git_dir, ds.name),
                            check=True)

            query = """
                       mutation myMutation($lo: String!, $ln: String!, $do: String!, $dn: String!,
                                           $a: String!) {
                         modifyDatasetLink(input: {labbookOwner: $lo, labbookName: $ln, datasetOwner: $do, datasetName: $dn,
                                                   action: $a}) {
                             newLabbookEdge {
                               node {
                                 id
                                 name
                                 description
                                 linkedDatasets {
                                   name
                                 }
                               }
                             }
                         }
                       }
                       """
            variables = {
                "lo": "default",
                "ln": "test-lb",
                "do": "default",
                "dn": "dataset100",
                "a": "update"
            }
            result = fixture_working_dir[2].execute(query,
                                                    variable_values=variables)
            assert "errors" not in result
            snapshot.assert_match(result)

            # verify change is reflected
            assert os.path.exists(
                os.path.join(dataset_submodule_dir, 'test_file.dat')) is True

            # Verify activity record
            assert "Updated Dataset `default/dataset100` link to version" in lb.git.log(
            )[0]['message']

        finally:
            if os.path.exists(git_dir):
                shutil.rmtree(git_dir)

示例#6

0

显示文件

    def test_download_dataset_files_file_fail(
            self, mock_config_file_background_tests):
        def dispatch_query_mock(self, job_key):
            # mock the job actually running and returning status
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={
                                 'completed_bytes': '0',
                                 'failure_keys': 'test1.txt'
                             })

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs)
            return "afakejobkey"

        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 1
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        helper_compress_file(obj1_target, obj1_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj1_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file_background_tests[0]):
            with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
                with patch.object(Dispatcher, 'query_task',
                                  dispatch_query_mock):
                    dl_kwargs = {
                        'logged_in_username': "******",
                        'access_token': "asdf",
                        'id_token': "1234",
                        'dataset_owner': "default",
                        'dataset_name': "dataset100",
                        'labbook_owner': None,
                        'labbook_name': None,
                        'keys': ["test1.txt"],
                        'config_file': mock_config_file_background_tests[0]
                    }

                    with pytest.raises(IOError):
                        gtmcore.dispatcher.dataset_jobs.download_dataset_files(
                            **dl_kwargs)
                    assert os.path.isfile(obj1_target) is False

示例#7

0

显示文件

    def test_download_dataset_files(self, mock_config_file_background_tests,
                                    mock_dataset_head):
        def dispatch_query_mock(self, job_key):
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={'completed_bytes': '500'})

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')
                gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs)

                return "afakejobkey"

        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 1
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        helper_compress_file(obj1_target, obj1_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj1_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file_background_tests[0]):
            with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
                with patch.object(Dispatcher, 'query_task',
                                  dispatch_query_mock):
                    dl_kwargs = {
                        'logged_in_username': "******",
                        'access_token': "asdf",
                        'id_token': "1234",
                        'dataset_owner': "default",
                        'dataset_name': "dataset100",
                        'labbook_owner': None,
                        'labbook_name': None,
                        'keys': ["test1.txt"],
                        'config_file': mock_config_file_background_tests[0]
                    }

                    gtmcore.dispatcher.dataset_jobs.download_dataset_files(
                        **dl_kwargs)
                    assert os.path.isfile(obj1_target) is True

                    decompressor = snappy.StreamDecompressor()
                    with open(obj1_source, 'rb') as dd:
                        source1 = decompressor.decompress(dd.read())
                        source1 += decompressor.flush()
                    with open(obj1_target, 'rt') as dd:
                        dest1 = dd.read()
                    assert source1.decode("utf-8") == dest1

示例#8

0

显示文件

    def test_pull_objects(self, mock_config_file, mock_dataset_head):
        im = InventoryManager(mock_config_file[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        os.makedirs(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "other_dir"))
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "fdsfgfd")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True
        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test2.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file[0]):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')

                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_2,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj2_source, 'rb') as data2:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        body=data2.read(),
                        status=200,
                        content_type='application/octet-stream')

                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test1.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is False

                decompressor = snappy.StreamDecompressor()
                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                # Download other file
                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test2.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is True

                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                with open(obj2_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj2_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

示例#9

0

显示文件

    def test_push_objects(self, mock_config_file, mock_dataset_head):
        im = InventoryManager(mock_config_file[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        manifest = Manifest(ds, 'default')
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj2 = obj_to_push[1].object_path.rsplit('/', 1)

        with aioresponses() as mocked_responses:
            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj1}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj1,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1",
                                 payload={},
                                 status=200)

            mocked_responses.put(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}',
                payload={
                    "presigned_url": f"https://dummyurl.com/{obj2}?params=1",
                    "namespace": ds.namespace,
                    "key_id": "hghghg",
                    "obj_id": obj2,
                    "dataset": ds.name
                },
                status=200)
            mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1",
                                 payload={},
                                 status=200)

            job_kwargs = {
                'objs': obj_to_push,
                'logged_in_username': "******",
                'access_token': "faketoken",
                'id_token': "faketoken",
                'dataset_owner': ds.namespace,
                'dataset_name': ds.name,
                'config_file': ds.client_config.config_file,
            }
            gtmcore.dispatcher.dataset_jobs.push_dataset_objects(**job_kwargs)