Пример #1
0
    def test_sftp(self, cleanup, signup, testcfg):
        cfg_name = cfg_settings_pipe_sftp['name']
        api = getapi(testcfg.get('local', False))

        # test quick create
        d6tpipe.upsert_pipe(api, cfg_settings_parent_sftp)
        d6tpipe.upsert_pipe(api, cfg_settings_pipe_sftp)

        # test paths
        r, d = api.cnxn.pipes._(cfg_settings_parent_sftp['name']).get()
        assert d['options']['remotepath'] == '/'
        r, d = api.cnxn.pipes._(cfg_settings_pipe_sftp['name']).get()
        assert d['options'][
            'remotepath'] == cfg_settings_pipe_sftp['options']['dir'] + '/'

        # test push/pull
        pipe = getpipe(api, name=cfg_name, mode='all')
        pipe.delete_files_remote(confirm=False)

        cfg_copyfile = 'test.csv'
        df = pd.DataFrame({'a': range(10)})
        df.to_csv(pipe.dirpath / cfg_copyfile, index=False)
        assert pipe.scan_remote(cached=False) == []
        assert pipe.pull() == []
        assert pipe.push_preview() == [cfg_copyfile]
        assert pipe.push() == [cfg_copyfile]
        pipe._cache_scan.clear()
        assert pipe.pull() == [cfg_copyfile]

        # cleanup
        pipe.delete_files_remote(confirm=False)
        assert pipe.scan_remote(cached=False) == []
        pipe.delete_files_local(confirm=False, delete_all=True)
Пример #2
0
    def test_ftp(self, cleanup, signup, testcfg):
        api = getapi(testcfg.get('local', False))

        # test quick create
        d6tpipe.upsert_pipe_json(api, 'tests/.creds-test.json',
                                 'pipe-test-ftp')
        cfg_name = 'test-ftp'

        # test paths
        r, d = api.cnxn.pipes._(cfg_name).get()
        assert d['options']['remotepath'] == '/utest/'

        # test push/pull
        pipe = getpipe(api, name=cfg_name, mode='all')
        cfg_copyfile = 'test.csv'
        df = pd.DataFrame({'a': range(10)})
        df.to_csv(pipe.dirpath / cfg_copyfile, index=False)
        assert pipe.scan_remote(cached=False) == []
        assert pipe.pull() == []
        assert pipe.push_preview() == [cfg_copyfile]
        assert pipe.push() == [cfg_copyfile]
        pipe._cache_scan.clear()
        assert pipe.pull() == [cfg_copyfile]
        pipe.delete_files(confirm=False, all_local=True)
        assert pipe.scan_remote(cached=False) == []
Пример #3
0
 def test_ftp_base(self, cleanup, signup, setup_ftp_base, testcfg):
     # assert False
     pipe = setup_ftp_base
     files = pipe.scan_remote()
     assert files[0][0] != '/'  # check no root dir
     files2 = pipe.pull()
     assert len(files2) == len(files)
     assert len(pipe.scan_local()) == len(files)
Пример #4
0
    def test_intro_stat_learning(self, cleanup, signup, testcfg):
        cfg_name = cfg_settings_islr['name']
        cfg_filenames_islr = [
            'Advertising.csv', 'Advertising2.csv', 'Auto.csv', 'Ch10Ex11.csv',
            'College.csv', 'Credit.csv', 'Heart.csv', 'Income1.csv',
            'Income2.csv', 'LICENSE.md', 'README.md'
        ]

        # start with local repo
        pipelocal = d6tpipe.PipeLocal(cfg_name,
                                      profile=cfg_profile,
                                      filecfg=cfg_cfgfname)
        pipelocal.delete_files_local(confirm=False, delete_all=False)
        pipelocal.import_dir('tests/intro-stat-learning/')
        assert pipelocal.scan_local() == cfg_filenames_islr
        assert pipelocal.files() == []
        assert pipelocal.files(fromdb=False) == cfg_filenames_islr

        df = pd.read_csv(pipelocal.dirpath / 'Advertising.csv')
        assert not df.empty

        if not testcfg.get('local', False):
            # set up public repo
            api = getapi()
            d6tpipe.upsert_pipe(api, cfg_settings_islr)
            d6tpipe.upsert_permissions(api, cfg_name, {
                "username": '******',
                "role": "read"
            })
            pipe = d6tpipe.Pipe(api, cfg_name, mode='all')
            pipe.delete_files_remote(confirm=False)
            assert pipe.scan_remote(cached=False) == []
            assert pipe.push() == cfg_filenames_islr
            pipelocal = d6tpipe.PipeLocal(cfg_name,
                                          profile=cfg_profile,
                                          filecfg=cfg_cfgfname)
            assert len(pipelocal.schema) > 0

            api2 = getapi2()
            pipe = d6tpipe.Pipe(api2, cfg_name)
            pipe.delete_files_local(confirm=False, delete_all=False)
            assert pipe.pull() == cfg_filenames_islr

            df = pd.read_csv(pipe.dirpath / 'Advertising.csv',
                             **pipe.schema['pandas'])
            assert not df.empty

            import dask.dataframe as dd
            files = pipe.filepaths(include='Advertising*.csv')
            ddf = dd.read_csv(files, **pipe.schema['dask'])
            assert not ddf.compute().empty
            pipe.delete_files_local(confirm=False, delete_all=False)

        pipelocal.delete_files_local(confirm=False, delete_all=True)
Пример #5
0
    def test_pipes_pull(self, cleanup, signup, parentinit, pipeinit, testcfg):
        api = getapi(testcfg.get('local',False))
        pipe = getpipe(api)
        assert pipe.name in api.list_pipes()

        cfg_chk_crc = ['8a9782e9efa8befa9752045ca506a62e',
         '5fe579d6b71031dad399e8d4ea82820b',
         '4c7da169df85253d7ff737dde1e7400b',
         'ca62a122993494e763fd1676cce95e76']

        # assert False
        assert pipe.files() == []
        assert pipe.scan_remote() == cfg_filenames_chk
        r, d = pipe.scan_remote(attributes=True)
        assert _filenames(d) == cfg_filenames_chk
        assert [o['crc'] for o in d]==cfg_chk_crc

        assert api.list_local_pipes()==[]
        assert pipe.pull_preview() == cfg_filenames_chk
        assert pipe.pull() == cfg_filenames_chk
        assert pipe.pull_preview() == []
        assert api.list_local_pipes()==[pipe.name]

        assert pipe.files() == cfg_filenames_chk
        assert pipe.filepaths() == [Path(pipe.dirpath)/f for f in pipe.files()]
        assert pipe.filepaths(aspathlib=False) == [str(Path(pipe.dirpath)/f) for f in pipe.files()]

        pipe = getpipe(api, chk_empty=False, mode='all')
        assert pipe.pull_preview() == cfg_filenames_chk

        # PipeLocal
        pipelocal = d6tpipe.PipeLocal(pipe.name,profile=cfg_profile, filecfg=cfg_cfgfname)
        assert pipelocal.files() == cfg_filenames_chk
        assert pipelocal.scan_local() == cfg_filenames_chk
        assert pipelocal.schema == cfg_settings_pipe['schema']
        df = pd.read_csv(pipe.dirpath/cfg_filenames_chk[0], **pipe.schema['pandas'])

        # permissions
        if not testcfg.get('local',False):
            api2 = getapi2(testcfg.get('local', False))
            with pytest.raises(APIError, match='403'):
                pipe2 = getpipe(api2, name=cfg_pipe_name, mode='all')
                pipe2.pull()

            settings = {"username": cfg_usr2, "role": "read"}
            r,d = d6tpipe.upsert_permissions(api, cfg_parent_name, settings)

            pipe2 = getpipe(api2, name=cfg_pipe_name, mode='all')
            assert pipe2.pull()==cfg_filenames_chk

        # cleanup
        pipe.delete_files_local(confirm=False,delete_all=True)
Пример #6
0
 def pull(self):
     api = getapi(local=True)
     pipe = d6tpipe.pipe.Pipe(api, cfg_pipe_name)
     pipe.pull()
     yield True
     pipe.delete_files_local(confirm=False, delete_all=True)
Пример #7
0
    def test_d6tfree(self, cleanup, signup, testcfg):
        if not testcfg.get('local', False):

            cfg_name = 'utest-d6tfree'
            api = getapi(testcfg.get('local', False))

            # test quick create
            d6tpipe.upsert_pipe(api, {'name': cfg_name})
            r, d = api.cnxn.pipes._(cfg_name).get()
            assert cfg_usr in d['options']['remotepath'] and cfg_name in d[
                'options']['remotepath'] and d['protocol'] == 's3'
            cred_read = api.cnxn.pipes._(cfg_name).credentials.get(
                query_params={'role': 'read'})[1]
            cred_write = api.cnxn.pipes._(cfg_name).credentials.get(
                query_params={'role': 'write'})[1]
            assert "aws_session_token" in cred_read and "aws_session_token" in cred_write
            assert cred_read['aws_access_key_id'] != cred_write[
                'aws_access_key_id']

            # assert False

            # test force renew
            pipe = getpipe(api, name=cfg_name, mode='all')
            pipe._reset_credentials()
            cred_read2 = api.cnxn.pipes._(cfg_name).credentials.get(
                query_params={'role': 'read'})[1]
            assert cred_read2['aws_access_key_id'] != cred_read[
                'aws_access_key_id']

            # test push/pull
            cfg_copyfile = 'folder/test.csv'
            cfg_copyfile2 = 'folder/test2.csv'
            pipe.delete_files_remote(confirm=False)

            df = pd.DataFrame({'a': range(10)})
            (pipe.dirpath / cfg_copyfile).parent.mkdir(exist_ok=True)
            df.to_csv(pipe.dirpath / cfg_copyfile, index=False)
            # assert False
            assert pipe.push() == [cfg_copyfile]
            pipe._cache_scan.clear()
            assert pipe.pull() == [cfg_copyfile]

            # permissions - no access
            api2 = getapi2(testcfg.get('local', False))
            with pytest.raises(APIError, match='403'):
                pipe2 = getpipe(api2, name=cfg_name, mode='all')
                pipe2.pull()

            # permissions - read
            settings = {"username": cfg_usr2, "role": "read"}
            d6tpipe.upsert_permissions(api, cfg_name, settings)

            pipe2 = getpipe(api2, name=cfg_name, mode='all')
            assert pipe2.role == 'read'
            assert pipe2.pull() == [cfg_copyfile]

            df.to_csv(pipe2.dirpath / cfg_copyfile2, index=False)
            with pytest.raises(ValueError, match='Read-only'):
                pipe2.push()

            # permissions - write
            settings = {"username": cfg_usr2, "role": "write"}
            d6tpipe.upsert_permissions(api, cfg_name, settings)

            pipe2 = getpipe(api2, name=cfg_name, mode='all', chk_empty=False)
            assert pipe2.role == 'write'
            assert pipe2.pull() == [cfg_copyfile]
            assert pipe2.push() == [cfg_copyfile, cfg_copyfile2]

            # todo: check don't have access to parent paths in s3
            # todo: file include patterns
            # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-daily','parent':'demo-vendor','options':{'include':'*daily*.csv'}})
            # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-monthly','parent':'demo-vendor','options':{'include':'*monthly*.csv'}})

            # cleanup
            pipe.delete_files_remote(confirm=False)
            assert pipe.scan_remote(cached=False) == []
            pipe.delete_files_local(confirm=False, delete_all=True)
Пример #8
0
    def test_pipes_push(self, cleanup, signup, parentinit, pipeinit, testcfg):
        api = getapi(testcfg.get('local', False))
        pipe = getpipe(api, chk_empty=False)
        pipe.delete_files_local(confirm=False, delete_all=True)
        assert pipe.scan_local() == []
        pipe = getpipe(api)
        with pytest.raises(PushError):
            pipe.push_preview()
        pipe.pull()

        # push works
        cfg_copyfile = 'test.csv'
        df = pd.DataFrame({'a': range(10)})
        df.to_csv(pipe.dirpath / cfg_copyfile, index=False)
        assert set(pipe.scan_local()) == set(cfg_filenames_chk +
                                             [cfg_copyfile])
        assert pipe.files() == cfg_filenames_chk
        assert pipe.push_preview() == [cfg_copyfile]
        assert pipe.push() == [cfg_copyfile]
        assert pipe.push_preview() == []
        pipe._cache_scan.clear()
        assert pipe.pull_preview() == []

        # doesn't take files not meet pattern
        cfg_copyfile2 = 'test.xlsx'
        df.to_csv(pipe.dirpath / cfg_copyfile2)
        assert pipe.push_preview() == []
        (pipe.dirpath / cfg_copyfile2).unlink()

        # todo: push exclude

        # files() works
        assert pipe.files() == cfg_filenames_chk + [cfg_copyfile]
        assert pipe.files(include='Machine*.csv') == cfg_filenames_chk
        assert pipe.files(exclude='Machine*.csv') == [cfg_copyfile]
        assert pipe.files(sortby='mod')[-1] == cfg_copyfile

        # crc works
        df2 = pd.read_csv(pipe.dirpath / cfg_copyfile, **pipe.schema['pandas'])
        df2.to_csv(pipe.dirpath / cfg_copyfile, index=False)
        assert pipe.push_preview() == []
        df.to_csv(pipe.dirpath / cfg_copyfile, index=True)
        assert pipe.push_preview() == [cfg_copyfile]

        # files param works
        assert pipe.pull(files=[cfg_copyfile]) == [cfg_copyfile]
        pipe.delete_files_remote(files=[cfg_copyfile], confirm=False)
        assert pipe._pullpush_luigi([cfg_copyfile], 'exists') == [False]
        assert pipe.push(files=[cfg_copyfile]) == [cfg_copyfile]
        assert pipe._pullpush_luigi([cfg_copyfile], 'exists') == [True]

        # remove_orphans works
        (pipe.dirpath / cfg_copyfile).unlink()
        pipe._cache_scan.clear()
        assert pipe.remove_orphans(direction='both',
                                   dryrun=True)['remote'] == [cfg_copyfile]
        assert pipe.remove_orphans(direction='both',
                                   dryrun=False)['remote'] == [cfg_copyfile]
        assert pipe._pullpush_luigi(['test.csv'], 'exists') == [False]

        # cleanup
        pipe.delete_files_local(confirm=False, delete_all=True)
        assert pipe.scan_local() == []

        # def test_pipes_includeexclude(self, cleanup, parentinit, pipeinit, testcfg):
        #     pass
        '''