Пример #1
0
    def setUp(self):
        self._test_dir = os.path.join(os.path.sep, 'tmp', 'dvc_unit_test')
        curr_dir = None
        commit = 'abc1234'

        BasicEnvironment.init_environment(self, self._test_dir, curr_dir)

        self._commit = commit

        self._git = GitWrapperI(git_dir=self._test_git_dir,
                                commit=self._commit)
        self._config = ConfigI('data', 'cache', 'state', '.target')
        self.path_factory = PathFactory(self._git, self._config)
        self.settings = Settings(['target'], self._git, self._config)

        os.chdir(self._test_git_dir)

        os.mkdir(os.path.join('data', 'dir1'))
        os.mkdir(os.path.join('cache', 'dir1'))

        self.file1_cache = os.path.join('cache', 'dir1', 'file1')
        self.file1_data = os.path.join('data', 'dir1', 'file1')

        open(self.file1_cache, 'w').write('ddfdff')
        System.symlink(self.file1_cache, self.file1_data)

        self.file2_cache = os.path.join('cache', 'file2')
        self.file2_data = os.path.join('data', 'file2')

        open(self.file2_cache, 'w').write('ddfdff')
        System.symlink(self.file2_cache, self.file2_data)
Пример #2
0
    def storage_path_hierarchy_test(self):
        """ StoragePath should be read first from global and then from cloud section """

        c = ("[Global]", "LogLevel =", "DataDir = ", "CacheDir = ",
             "StateDir = ", "Cloud = aws",
             "StoragePath = globalsb/global_storage_path", "[AWS]",
             "StoragePath = awssb/aws_storage_path", "CredentialPath =",
             "[GCP]", "StoragePath = googlesb/google_storage_path")
        s = StringIO('\n'.join(c))
        conf = Config(s, conf_pseudo_file=s)
        cloud = DataCloud(Settings(None, None, conf))
        self.assertEqual(cloud.typ, 'AWS')
        self.assertEqual(cloud._cloud.storage_bucket, 'globalsb')
        self.assertEqual(cloud._cloud.storage_prefix, 'global_storage_path')

        c = ("[Global]", "LogLevel =", "DataDir = ", "CacheDir = ",
             "StateDir = ", "Cloud = Aws", "[AWS]",
             "StoragePath = awssb/aws_storage_path", "CredentialPath =",
             "[GCP]", "StoragePath = googlesb/google_storage_path")
        s = StringIO('\n'.join(c))
        conf = Config(s, conf_pseudo_file=s)
        cloud = DataCloud(Settings(None, None, conf))
        self.assertEqual(cloud.typ, 'AWS')
        self.assertEqual(cloud._cloud.storage_bucket, 'awssb')
        self.assertEqual(cloud._cloud.storage_prefix, 'aws_storage_path')
Пример #3
0
    def run(cmd_class, parse_config=True, args_start_loc=2):
        """

        Arguments:
            args_start_loc (int): where the arguments this command should use start
        """

        try:
            runtime_git = GitWrapper()

            if parse_config:
                runtime_config = Config(Runtime.conf_file_path(runtime_git.git_dir))
            else:
                runtime_config = ConfigI()

            args = sys.argv[args_start_loc:]

            # To make argparse print "usage: dvc cmd" instead of "usage: dvc"
            sys.argv[0] = sys.argv[0] + " " + sys.argv[1]

            instance = cmd_class(Settings(args, runtime_git, runtime_config))
            sys.exit(instance.run())
        except DvcException as e:
            Logger.error(e)
            sys.exit(1)
Пример #4
0
    def aws_credentials_specified_test(self, isfile_function):
        """ in presence of [AWS] -> CredentialPath, use those credentials """

        c = (
            "[Global]",
            "LogLevel =",
            "DataDir = ",
            "CacheDir = ",
            "StateDir = ",
            "Cloud = aws",
            "[AWS]",
            "StoragePath = awssb/aws_storage_path",
            "CredentialPath = some_credential_path",
        )
        s = StringIO('\n'.join(c))

        patcher = mock.patch(
            builtin_module_name + '.open',
            side_effect=self.mocked_open_aws_default_credentials)

        patcher.start()
        conf = Config(s, conf_pseudo_file=s)
        cloud = DataCloud(Settings(None, None, conf))
        aws_creds = cloud._cloud.get_aws_credentials()
        patcher.stop()

        self.assertEqual(aws_creds[0], 'override_access_id')
        self.assertEqual(aws_creds[1], 'override_sekret')
Пример #5
0
    def aws_credentials_default_test(self):
        """ in absence of [AWS] -> CredentialPath, aws creds should be read from ~/.aws/credentials """

        default_path = os.path.expanduser('~/.aws/credentials')
        c = (
            "[Global]",
            "LogLevel =",
            "DataDir = ",
            "CacheDir = ",
            "StateDir = ",
            "Cloud = aws",
            "[AWS]",
            "StoragePath = awssb/aws_storage_path",
            "CredentialPath =",
        )
        s = StringIO('\n'.join(c))

        patcher = mock.patch(
            builtin_module_name + '.open',
            side_effect=self.mocked_open_aws_default_credentials)

        patcher.start()
        conf = Config(s, conf_pseudo_file=s)
        cloud = DataCloud(Settings(None, None, conf))
        aws_creds = cloud._cloud.get_aws_credentials()
        patcher.stop()

        self.assertEqual(aws_creds[0], 'default_access_id')
        self.assertEqual(aws_creds[1], 'default_sekret')
Пример #6
0
    def setUp(self):
        self._test_dir = os.path.join(os.path.sep, 'tmp', 'dvc_unit_test')
        curr_dir = None
        commit = 'abc1234'

        BasicEnvironment.init_environment(self, self._test_dir, curr_dir)

        self._commit = commit

        self._git = GitWrapperI(git_dir=self._test_git_dir,
                                commit=self._commit)
        self._config = ConfigI('data', 'cache', 'state')
        self.path_factory = PathFactory(self._git, self._config)
        self.settings = Settings('gc', self._git, self._config)

        self.dir1 = 'dir1'

        self.create_dirs(self.dir1)
Пример #7
0
    def init_environment(self,
                         test_dir=os.path.join(os.path.sep, 'tmp',
                                               'ntx_unit_test'),
                         curr_dir=None,
                         commit='abc12345'):
        ''' Creates data environment with data, cache and state dirs.
        data/
            file1.txt     --> ../cache/file1.txt_abc123
            dir1/
                file2.txt      --> ../../cache/dir1/file2.txt_abc123
                file3.txt      --> ../../cache/dir1/file3.txt_abc123
                dir11/
                    file4.txt  --> ../../../cache/dir1/dir11/file4.txt_abc123
            dir2/
                file5.txt      --> ../../cache/dir2/file5.txt_abc123
                file6.txt      --> an actual file
        '''

        BasicEnvironment.init_environment(self, test_dir, curr_dir)

        self._commit = commit

        self._git = GitWrapperI(git_dir=self._test_git_dir,
                                commit=self._commit)
        self._config = ConfigI('data')
        self.path_factory = PathFactory(self._git, self._config)
        self.settings = Settings([], self._git, self._config)

        self.dir1 = 'dir1'
        self.dir11 = os.path.join('dir1', 'dir11')
        self.dir2 = 'dir2'

        self.create_dirs(self.dir1)
        self.create_dirs(self.dir11)
        self.create_dirs(self.dir2)

        self.file1, self.cache1, self.state1 = self.crate_data_item(
            'file1.txt')
        self.file2, self.cache2, self.state2 = self.crate_data_item(
            os.path.join(self.dir1, 'file2.txt'))
        self.file3, self.cache3, self.state3 = self.crate_data_item(
            os.path.join(self.dir1, 'file3.txt'))
        self.file4, self.cache4, self.state4 = self.crate_data_item(
            os.path.join(self.dir11, 'file4.txt'))
        self.file5, self.cache5, self.state5 = self.crate_data_item(
            os.path.join(self.dir2, 'file5.txt'))
        self.file6, self.cache6, self.state6 = self.crate_data_item(
            os.path.join(self.dir2, 'file6.txt'), cache_file=False)
        pass
Пример #8
0
def main():
    try:
        settings = Settings(sys.argv[1:])
        instance = settings._parsed_args.func(settings)
    except Exception as e:
        Logger.error("Exception caught while parsing settings", exc_info=True)
        return 255

    try:
        ret = instance.run()
    except Exception as e:
        Logger.error("Exception caught in " + instance.__class__.__name__, exc_info=True)
        return 254

    return ret
    def setUp(self):
        self.test_dir = tempfile.mkdtemp()
        self._old_curr_dir_abs = os.path.realpath(os.curdir)

        self.tearDown()
        os.mkdir(self.test_dir)
        os.chdir(self.test_dir)
        os.mkdir('data')

        self._devnull = open(os.devnull, 'w')
        subprocess.Popen(['git', 'init'], stdout=self._devnull, stderr=None).wait()

        self.git = GitWrapperI(self.test_dir)
        self.config = ConfigI('data', 'cache', 'state')
        self.path_factory = PathFactory(self.git, self.config)
        self.settings = Settings([], self.git, self.config)
Пример #10
0
def main():
    try:
        settings = Settings(sys.argv[1:])
        instance = settings._parsed_args.func(settings)
    except Exception as e:
        # In case we didn't even manage to parse options
        exc_info = '-v' in sys.argv or '--verbose' in sys.argv
        Logger.error("Settings error: {}".format(e), exc_info=exc_info)
        return 255

    try:
        ret = instance.run_cmd()
    except Exception as e:
        exc_info = settings.parsed_args.verbose
        Logger.error("{} error: {}".format(instance.__class__.__name__, e), exc_info=exc_info)
        return 254

    return ret
Пример #11
0
    def setUp(self):
        self.test_dir = System.get_long_path(tempfile.mkdtemp())
        self._old_curr_dir_abs = System.realpath(os.curdir)

        self.tearDown()
        os.mkdir(self.test_dir)
        os.chdir(self.test_dir)
        os.mkdir('data')
        os.mkdir('cache')
        os.mkdir('state')

        self.init_git_repo()
        self.git = GitWrapper()

        self.config = ConfigI('data', 'cache', 'state')
        self.path_factory = PathFactory(self.git, self.config)

        self.settings = Settings([], self.git, self.config)
        pass
Пример #12
0
    def setUp(self):
        self.test_dir = System.get_long_path(tempfile.mkdtemp())
        self._old_curr_dir_abs = System.realpath(os.curdir)

        self.tearDown()
        os.mkdir(self.test_dir)
        os.chdir(self.test_dir)
        os.mkdir('data')
        os.mkdir(ConfigI.CONFIG_DIR)
        os.mkdir(os.path.join(ConfigI.CONFIG_DIR, ConfigI.CACHE_DIR_NAME))
        os.mkdir(os.path.join(ConfigI.CONFIG_DIR, ConfigI.STATE_DIR_NAME))

        self.init_git_repo()
        self.git = GitWrapper()

        self.config = ConfigI('data')
        self.path_factory = PathFactory(self.git, self.config)

        self.settings = Settings('run cmd', self.git, self.config)
        pass
Пример #13
0
class TestCmdTarget(BasicEnvironment):
    def setUp(self):
        self._test_dir = os.path.join(os.path.sep, 'tmp', 'dvc_unit_test')
        curr_dir = None
        commit = 'abc1234'

        BasicEnvironment.init_environment(self, self._test_dir, curr_dir)

        self._commit = commit

        self._git = GitWrapperI(git_dir=self._test_git_dir,
                                commit=self._commit)
        self._config = ConfigI('data', 'cache', 'state', '.target')
        self.path_factory = PathFactory(self._git, self._config)
        self.settings = Settings(['target'], self._git, self._config)

        os.chdir(self._test_git_dir)

        os.mkdir(os.path.join('data', 'dir1'))
        os.mkdir(os.path.join('cache', 'dir1'))

        self.file1_cache = os.path.join('cache', 'dir1', 'file1')
        self.file1_data = os.path.join('data', 'dir1', 'file1')

        open(self.file1_cache, 'w').write('ddfdff')
        System.symlink(self.file1_cache, self.file1_data)

        self.file2_cache = os.path.join('cache', 'file2')
        self.file2_data = os.path.join('data', 'file2')

        open(self.file2_cache, 'w').write('ddfdff')
        System.symlink(self.file2_cache, self.file2_data)

    def test_initial_default_target(self):
        self.assertFalse(os.path.exists('.target'))

    def test_single_file(self):
        self.settings.parse_args('target {}'.format(self.file1_data))
        cmd = CmdTarget(self.settings)

        self.assertEqual(cmd.run(), 0)
        self.assertEqual(open('.target').read(), self.file1_data)

    def test_multiple_files(self):
        self.settings.parse_args('target {}'.format(self.file1_data))
        cmd = CmdTarget(self.settings)
        cmd.run()

        # Another target
        self.settings.parse_args('target {}'.format(self.file2_data))
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 0)
        self.assertEqual(open('.target').read(), self.file2_data)

        # Unset target
        self.settings.parse_args('target --unset')
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 0)
        self.assertEqual(open('.target').read(), '')

    def test_initial_unset(self):
        self.settings.parse_args('target --unset')
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 1)
        self.assertFalse(os.path.exists('.target'))

    def test_unset_existing_target(self):
        self.settings.parse_args('target {}'.format(self.file1_data))
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 0)
        self.assertEqual(open('.target').read(), self.file1_data)

        self.settings.parse_args('target --unset')
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 0)
        self.assertEqual(open('.target').read(), '')

    def test_the_same(self):
        self.settings.parse_args('target {}'.format(self.file1_data))
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 0)
        self.assertEqual(open('.target').read(), self.file1_data)

        self.assertEqual(cmd.run(), 0)
        self.assertEqual(open('.target').read(), self.file1_data)

    def test_args_conflict(self):
        self.settings.parse_args('target {} --unset'.format(self.file1_data))
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 1)

    def test_no_args(self):
        self.settings.parse_args('target --unset')
        cmd = CmdTarget(self.settings)
        self.assertEqual(cmd.run(), 1)
Пример #14
0
def main():
    settings = Settings(sys.argv[1:])
    instance = settings._parsed_args.func(settings)
    sys.exit(instance.run())
Пример #15
0
class TestCmdDataRemove(BasicEnvironment):
    def setUp(self):
        self._test_dir = os.path.join(os.path.sep, 'tmp', 'dvc_unit_test')
        curr_dir = None
        commit = 'abc1234'

        BasicEnvironment.init_environment(self, self._test_dir, curr_dir)

        self._commit = commit

        self._git = GitWrapperI(git_dir=self._test_git_dir,
                                commit=self._commit)
        self._config = ConfigI('data', 'cache', 'state')
        self.path_factory = PathFactory(self._git, self._config)
        self.settings = Settings('gc', self._git, self._config)

        self.dir1 = 'dir1'

        self.create_dirs(self.dir1)

    def test_single_file(self):
        os.chdir(self._test_git_dir)

        self.crate_data_item_with_five_caches('', 'file1.txt')
        self.assertEqual(5, self.cache_file_nums('file1*'))

        cmd = CmdGC(self.settings)
        os.chdir(self._test_git_dir)

        cmd.gc_file(os.path.join('data', 'file1.txt'))
        self.assertEqual(1, self.cache_file_nums('file1*'))

    def test_all(self):
        os.chdir(self._test_git_dir)

        self.crate_data_item_with_five_caches('', 'file1.txt')
        self.crate_data_item_with_five_caches('', 'file2.txt')
        self.crate_data_item_with_five_caches(self.dir1, 'file3.txt')

        self.assertEqual(5, self.cache_file_nums('file1*'))
        self.assertEqual(5, self.cache_file_nums('file2*'))
        self.assertEqual(
            5, self.cache_file_nums(os.path.join(self.dir1, 'file3*')))

        self.settings.parse_args('gc --no-git-actions data')
        cmd = CmdGC(self.settings)
        os.chdir(self._test_git_dir)

        cmd.gc_all()

        self.assertEqual(1, self.cache_file_nums('file1*'))
        self.assertEqual(1, self.cache_file_nums('file2*'))
        self.assertEqual(
            1, self.cache_file_nums(os.path.join(self.dir1, 'file3*')))

    def test_empty(self):
        os.chdir(self._test_git_dir)

        self.crate_data_item_with_five_caches('', 'file1.txt')
        self.crate_data_item_with_five_caches('', 'file2.txt')
        self.assertEqual(5, self.cache_file_nums('file1*'))
        self.assertEqual(5, self.cache_file_nums('file2*'))

        self.settings.parse_args('gc --no-git-actions')
        cmd = CmdGC(self.settings)
        os.chdir(self._test_git_dir)

        cmd.gc_all()

        self.assertEqual(5, self.cache_file_nums('file1*'))
        self.assertEqual(5, self.cache_file_nums('file2*'))

    def create_dirs(self, dir):
        os.mkdir(os.path.join(self._test_git_dir, 'data', dir))
        os.mkdir(os.path.join(self._test_git_dir, 'cache', dir))
        os.mkdir(os.path.join(self._test_git_dir, 'state', dir))

    def cache_file_nums(self, pattern):
        os.chdir(os.path.join(self._test_git_dir, 'cache'))
        files = []
        for file in glob.glob(pattern):
            files.append(file)
        return len(files)

    def crate_data_item_with_five_caches(self,
                                         dir,
                                         data_file,
                                         content='random text'):
        os.chdir(self._test_git_dir)

        file_result = os.path.join('data', dir, data_file)

        state_result = os.path.join('state', dir,
                                    data_file + DataItem.STATE_FILE_SUFFIX)
        self.create_content_file(state_result, 'state content')

        file_prefix = data_file + DataItem.CACHE_FILE_SEP
        cache_result = os.path.join('cache', dir, file_prefix + self._commit)
        self.create_content_file(cache_result, content)
        relevant_dir = self.relevant_dir(os.path.join(dir, data_file))
        cache_file = os.path.join(relevant_dir, cache_result)

        d = os.path.join(self._test_git_dir, 'data', dir)
        os.chdir(d)
        print('---- CREATE SL {}: {} --> {}'.format(d, data_file, cache_file))
        System.symlink(cache_file, data_file)
        os.chdir(self._test_git_dir)

        data_item = self.settings.path_factory.existing_data_item(file_result)
        print('*DATA ITEM {}: {} {}'.format(file_result,
                                            data_item.data.relative,
                                            data_item.cache.relative))

        print('----> {} {} {} {}'.format(dir, data_file, cache_file,
                                         file_result))

        # Additional cache files
        self.create_content_file(
            os.path.join('cache', dir, file_prefix + 'aaaaaaa'))
        self.create_content_file(
            os.path.join('cache', dir, file_prefix + '1111111'))
        self.create_content_file(
            os.path.join('cache', dir, file_prefix + '5555555'))
        self.create_content_file(
            os.path.join('cache', dir, file_prefix + '123abff'))
        return

    @staticmethod
    def create_content_file(file, content='some test'):
        fd = open(file, 'w+')
        fd.write(content)
        fd.close()

    @staticmethod
    def relevant_dir(data_file):
        deepth = data_file.count(os.path.sep)
        relevant_path = '..'
        for i in range(deepth):
            relevant_path = os.path.join(relevant_path, '..')
        return relevant_path