Пример #1
0
 def test_tmp_move(self):
     target = hdfs.HdfsTarget(is_tmp=True)
     target2 = hdfs.HdfsTarget(self._test_file())
     if target2.exists():
         target2.remove(skip_trash=True)
     with target.open('w'):
         pass
     self.assertTrue(target.exists())
     target.move(target2.path)
     self.assertFalse(target.exists())
     self.assertTrue(target2.exists())
Пример #2
0
    def test_flag_target(self):
        target = hdfs.HdfsFlagTarget("/some/dir/", format=format)
        if target.exists():
            target.remove(skip_trash=True)
        self.assertFalse(target.exists())

        t1 = hdfs.HdfsTarget(target.path + "part-00000", format=format)
        with t1.open('w'):
            pass
        t2 = hdfs.HdfsTarget(target.path + "_SUCCESS", format=format)
        with t2.open('w'):
            pass
        self.assertTrue(target.exists())
Пример #3
0
    def test_rename_no_grandparent(self):
        grandparent = self._test_dir() + '/foo'
        if self.fs.exists(grandparent):
            self.fs.remove(grandparent, skip_trash=True)

        target1 = hdfs.HdfsTarget(is_tmp=True)
        target2 = hdfs.HdfsTarget(grandparent + '/bar/baz')
        with target1.open('w'):
            pass
        self.assertTrue(target1.exists())
        target1.move(target2.path)
        self.assertFalse(target1.exists())
        self.assertTrue(target2.exists())
Пример #4
0
    def put_file(self, local_target, local_filename, target_path, delpath=True):
        if local_target.exists():
            local_target.remove()
        self.create_file(local_target)

        if delpath:
            target = hdfs.HdfsTarget(target_path)
            if target.exists():
                target.remove(skip_trash=True)
            self.fs.mkdir(target.path)

        self.fs.put(local_target.path, target_path)
        target_file_path = target_path + "/" + local_filename
        return hdfs.HdfsTarget(target_file_path)
Пример #5
0
    def test_multifile(self):
        with self.target.open('w') as fobj:
            fobj.write(b'foo\n')
        second = hdfs.HdfsTarget(self.target.path + '/data2', format=hdfs.Plain)

        with second.open('w') as fobj:
            fobj.write(b'bar\n')
        invisible = hdfs.HdfsTarget(self.target.path + '/_SUCCESS', format=hdfs.Plain)
        with invisible.open('w') as fobj:
            fobj.write(b'b0rk\n')
        self.assertTrue(second.exists())
        self.assertTrue(invisible.exists())
        self.assertTrue(self.target.exists())
        with self.target.open('r') as fobj:
            parts = sorted(fobj.read().strip(b'\n').split(b'\n'))
        self.assertEqual(tuple(parts), (b'bar', b'foo'))
Пример #6
0
class TestTask(luigi.Task):

    rundate = luigi.DateParameter(default=datetime.now().date())
    table = "test"
    host = "localhost:3306"
    db = "testdb"
    user = "******"
    pw = "password"

    def input(self):

	    """
	    Provides the input directories. the path of directories are of the form /in/'directory'/'filename'.
	    It opens all the directories within \in and provides all the files in those directories as input.	
	    """
        dir_in = '/in'
        args = "hdfs dfs -ls "+dir_in+" | awk '{print $8}'"
        proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)

        s_output, s_err = proc.communicate()
        result = s_output.split()
        
        listf =[]
        for dir in result:
            args = "hdfs dfs -ls "+dir+" | awk '{print $8}'"
            proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)

            s_output, s_err = proc.communicate()
            listf.extend(s_output.split())

        return [hdfs.HdfsTarget(str) for str in listf ]
Пример #7
0
 def test_create_ancestors(self):
     parent = self._test_dir()
     target = hdfs.HdfsTarget("%s/foo/bar/baz" % parent)
     if self.fs.exists(parent):
         self.fs.remove(parent, skip_trash=True)
     self.assertFalse(self.fs.exists(parent))
     fobj = target.open('w')
     fobj.write('lol\n')
     fobj.close()
     self.assertTrue(self.fs.exists(parent))
     self.assertTrue(target.exists())
Пример #8
0
    def test_glob_exists(self):
        target_dir = hdfs.HdfsTarget(self._test_dir())
        if target_dir.exists():
            target_dir.remove(skip_trash=True)
        self.fs.mkdir(target_dir.path)
        t1 = hdfs.HdfsTarget(target_dir.path + "/part-00001")
        t2 = hdfs.HdfsTarget(target_dir.path + "/part-00002")
        t3 = hdfs.HdfsTarget(target_dir.path + "/another")

        with t1.open('w') as f:
            f.write('foo\n')
        with t2.open('w') as f:
            f.write('bar\n')
        with t3.open('w') as f:
            f.write('biz\n')

        files = hdfs.HdfsTarget("%s/part-0000*" % target_dir.path)

        self.assertTrue(files.glob_exists(2))
        self.assertFalse(files.glob_exists(3))
        self.assertFalse(files.glob_exists(1))
Пример #9
0
 def test_tmp_cleanup(self):
     path = self._test_file()
     target = hdfs.HdfsTarget(path, is_tmp=True)
     if target.exists():
         target.remove(skip_trash=True)
     with target.open('w') as fobj:
         fobj.write('lol\n')
     self.assertTrue(target.exists())
     del target
     import gc
     gc.collect()
     self.assertFalse(self.fs.exists(path))
Пример #10
0
 def tezt_rename_dont_move(self, client):
     """ I happen to just want to test this, Since I know the codepaths will
     be quite different for the three kinds of clients """
     if client.exists('d'):
         client.remove('d')
     client.mkdir('d/a')
     client.mkdir('d/b')
     self.assertEqual(2, len(list(client.listdir('d'))))
     target = hdfs.HdfsTarget('d/a', fs=client)
     self.assertFalse(target.move_dir('d/b'))
     self.assertEqual(2, len(list(client.listdir('d'))))
     self.assertTrue(target.move_dir('d/c'))
     self.assertEqual(2, len(list(client.listdir('d'))))
Пример #11
0
    def test_slow_exists(self):
        target = hdfs.HdfsTarget(self._test_file())
        try:
            target.remove(skip_trash=True)
        except:
            pass

        self.assertFalse(self.fs.exists(target.path))
        target.open("w").close()
        self.assertTrue(self.fs.exists(target.path))

        def should_raise():
            self.fs.exists("hdfs://doesnotexist/foo")
        self.assertRaises(hdfs.HDFSCliError, should_raise)

        def should_raise_2():
            self.fs.exists("hdfs://_doesnotexist_/foo")
        self.assertRaises(hdfs.HDFSCliError, should_raise_2)
Пример #12
0
 def test_pickle(self):
     t = hdfs.HdfsTarget("/tmp/dir")
     pickle.dumps(t)
Пример #13
0
 def test_luigi_tmp(self):
     target = hdfs.HdfsTarget(is_tmp=True)
     self.assertFalse(target.exists())
     with target.open('w'):
         pass
     self.assertTrue(target.exists())
Пример #14
0
 def create_target(self, format=None):
     target = hdfs.HdfsTarget(self._test_file(), format=format)
     if target.exists():
         target.remove(skip_trash=True)
     return target
Пример #15
0
 def setUp(self):
     super(ComplexOldFormatTest, self).setUp()
     self.target = hdfs.HdfsTarget(self._test_file(), format=self.format)
     if self.target.exists():
         self.target.remove(skip_trash=True)