示例#1
0
 def test_delete_dir(self):
     local = LocalFS(os.path.dirname(os.path.realpath(__file__)))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     local.copy_to_hdfs(hdfs_file.path)
     self.assertTrue(hdfs_file.exists(), "Target HDFS dir does not exists")
     hdfs_file.delete(recursive=True)
     self.assertFalse(hdfs_file.exists(), "Target HDFS dir was not deleted")
示例#2
0
 def download_dir(self,
                  path,
                  local_path,
                  predicate=lambda path, connector: True,
                  recursive=True):
     """
     Copies a remote directory (path) with files from the SFTP server
     to the local host into a local_path. In addition, copies inner directories
     if parameter 'recursive' is True. Filters file if predicate was given
     :param path: path to directory on ftp
     :param local_path: path to directory on local file system
     :param predicate: predicate for filter file
     :param recursive: copies all inner directory at the given path if is True
     :type path: str
     :type local_path: str
     :type recursive: bool
     """
     path = self.__normalize_path(path)
     self.__assert_exists(path)
     self.__assert_is_dir(path)
     local_path = self.__normalize_path(local_path)
     LocalFS(local_path).assert_exists()
     LocalFS(local_path).assert_is_dir()
     local_path = os.path.join(local_path, self.__get_name(path))
     LocalFS(local_path).create_directory()
     list_ = self.list_files(path)
     if predicate:
         list_ = [path for path in list_ if predicate(path, self)]
     for tmp in list_:
         if recursive and self.is_directory(tmp):
             self.download_dir(tmp, local_path, predicate, recursive)
         elif not self.is_directory(tmp):
             self.download_file(tmp, local_path)
示例#3
0
文件: test_hdfs.py 项目: epam/Merlin
 def test_delete_dir(self):
     local = LocalFS(os.path.dirname(os.path.realpath(__file__)))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     local.copy_to_hdfs(hdfs_file.path)
     self.assertTrue(hdfs_file.exists(), "Target HDFS dir does not exists")
     hdfs_file.delete(recursive=True)
     self.assertFalse(hdfs_file.exists(), "Target HDFS dir was not deleted")
示例#4
0
文件: ftp.py 项目: epam/Merlin
    def download_file(self, path, local_path):
        """
        Copies a remote file (path) from the SFTP server
        to the local host as local_path
        :param path: path to file on ftp
        :param local_path: path to future file or existing directory
        :type path: str
        :type local_path: str
        """
        path = self.__normalize_path(path)
        self.__assert_exists(path)
        local_path = self.__normalize_path(local_path)
        self.__assert_is_not_dir(path)

        if LocalFS(local_path).exists():
            _local_dst = local_path if not LocalFS(local_path).is_directory() else \
                os.path.join(local_path, self.__get_name(path))
            self.__ftp_instance.get(remotepath=path, localpath=_local_dst)
        elif LocalFS(self.__get_basename(local_path)).exists():
            local = LocalFS(self.__get_basename(local_path))
            if local.is_directory():
                self.__ftp_instance.get(remotepath=path, localpath=local_path)
            else:
                raise FTPFileError("'{0}' is not directory".format(local_path))
        else:
            raise FTPFileError("'{0}' is not exists".format(local_path))
示例#5
0
 def test_file_size(self):
     local = LocalFS(os.path.realpath(__file__))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     try:
         local.copy_to_hdfs(hdfs_file.path)
         self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS")
         self.assertEqual(hdfs_file.size(), local.size())
     finally:
         hdfs_file.delete()
示例#6
0
文件: test_hdfs.py 项目: epam/Merlin
 def test_file_size(self):
     local = LocalFS(os.path.realpath(__file__))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     try:
         local.copy_to_hdfs(hdfs_file.path)
         self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS")
         self.assertEqual(hdfs_file.size(), local.size())
     finally:
         hdfs_file.delete()
示例#7
0
 def test_dir_size(self):
     local_basedir = os.path.dirname(os.path.realpath(__file__))
     local = LocalFS(os.path.join(local_basedir, "resources", "test_dir_size"))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     try:
         local.copy_to_hdfs(hdfs_file.path)
         self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS")
         expected_fsize = local.size()
         self.assertEqual(hdfs_file.size(), expected_fsize)
     finally:
         hdfs_file.delete(recursive=True)
示例#8
0
 def _run_(self, application, test_id=str(uuid.uuid4())):
     basedir = LocalFS(os.path.join("/tmp", "test_spark", test_id))
     try:
         basedir.create_directory()
         _app_input = self.input_path
         _app_output_dir = os.path.join(basedir.path, "output")
         status = application.run('file:' + _app_input, 'file:' + _app_output_dir)
         self.assertTrue(status.is_ok(), status.stderr())
         self.assertTrue(os.path.exists(_app_output_dir), status.stderr())
     finally:
         basedir.delete_directory()
示例#9
0
文件: test_hdfs.py 项目: epam/Merlin
 def test_dir_size(self):
     local_basedir = os.path.dirname(os.path.realpath(__file__))
     local = LocalFS(os.path.join(local_basedir, "resources", "test_dir_size"))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     try:
         local.copy_to_hdfs(hdfs_file.path)
         self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS")
         expected_fsize = local.size()
         self.assertEqual(hdfs_file.size(), expected_fsize)
     finally:
         hdfs_file.delete(recursive=True)
示例#10
0
文件: flow.py 项目: Mbaroudi/Merlin
def load_file_from_local_to_hdfs(context):
    context['new_pathes'] = []
    for _file in LocalFS(
            os.path.join(os.path.dirname(__file__), "resources/tmp")):
        HDFS("/tmp/raw/{0}".format(parser_partition(_file.path))) \
            .create(directory=True)
        LocalFS(os.path.join(os.path.dirname(__file__),
                             "resources/tmp/{0}").format(_file.path)) \
            .copy_to_hdfs(hdfs_path="/tmp/raw/{0}/".format(parser_partition(_file.path)))
        context['new_pathes'].append("/tmp/raw/{0}".format(
            parser_partition(_file.path)))
示例#11
0
 def __create_local_dir(self, path, local_path):
     """
     Creates local directory
     :param path: path to file on ftp
     :param local_path: path to file on local file system
     :return:
     """
     LocalFS(local_path).assert_exists()
     names = os.path.basename(path)
     local_path = os.path.join(local_path, names)
     LocalFS(local_path).create_directory()
     return local_path
示例#12
0
 def upload(self, path, local_path, update=False):
     """
     Copies a local file (local_path) to the SFTP server as path.
     Updates exists file if parameter 'update' is True
     :param path: path to file on ftp
     :param local_path: path to file on local file system
     :param update: updates file on ftp if is True
     :type path: str
     :type local_path: str
     :type update: bool
     """
     path = self.__normalize_path(path)
     local_path = self.__normalize_path(local_path)
     LocalFS(local_path).assert_exists()
     if self.exists(path):
         if not self.is_directory(path):
             if update:
                 self.__ftp_instance.put(localpath=local_path,
                                         remotepath=path)
             else:
                 raise FTPFileError("'{0}' is exists".format(path))
         else:
             path_name = self.__get_name(local_path)
             self.__ftp_instance.put(localpath=local_path,
                                     remotepath=os.path.join(
                                         path, path_name))
     else:
         path_name = self.__get_basename(path)
         if self.exists(path_name):
             self.__ftp_instance.put(localpath=local_path, remotepath=path)
         else:
             raise FileNotFoundException(
                 "'{path}' does not exists".format(path=path_name))
示例#13
0
 def upload(self, path, local_path, update=False):
     """
     Copies a local file (local_path) to the SFTP server as path.
     Updates exists file if parameter 'update' is True
     :param path: path to file on ftp
     :param local_path: path to file on local file system
     :param update: updates file on ftp if is True
     :type path: str
     :type local_path: str
     :type update: bool
     """
     base_path = self.base_dir(path)
     LocalFS(local_path).assert_exists()
     if self.exists(path):
         if self.is_directory(path):
             name_files = os.path.basename(local_path)
             self.__copy_file_from_local(local_path,
                                         "/".join([path, name_files]))
         else:
             self.__assert_is_update(update)
             self.__copy_file_from_local(local_path, path)
     else:
         self.__assert_exists(base_path)
         name_files = path[len(base_path):]
         self.__copy_file_from_local(local_path,
                                     "/".join([base_path, name_files]))
示例#14
0
 def test_merge(self):
     basedir = os.path.dirname(os.path.realpath(__file__))
     local = LocalFS(os.path.join(basedir, "resources", "test_merge"))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     merged_file = LocalFS(os.path.join(basedir, "resources", "merged.txt"))
     try:
         local.copy_to_hdfs(hdfs_file.path)
         self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS")
         hdfs_file.merge(merged_file.path)
         self.assertTrue(merged_file.exists(), "merged file was not copied to local fs")
     finally:
         hdfs_file.delete_directory()
         merged_file.delete()
示例#15
0
    def download_file(self, path, local_path):
        """
        Copies a remote file (path) from the SFTP server
        to the local host as local_path
        :param path: path to file on ftp
        :param local_path: path to future file or existing directory
        :type path: str
        :type local_path: str
        """
        path = self.__normalize_path(path)
        self.__assert_exists(path)
        local_path = self.__normalize_path(local_path)
        self.__assert_is_not_dir(path)

        if LocalFS(local_path).exists():
            _local_dst = local_path if not LocalFS(local_path).is_directory() else \
                os.path.join(local_path, self.__get_name(path))
            self.__ftp_instance.get(remotepath=path, localpath=_local_dst)
        elif LocalFS(self.__get_basename(local_path)).exists():
            local = LocalFS(self.__get_basename(local_path))
            if local.is_directory():
                self.__ftp_instance.get(remotepath=path, localpath=local_path)
            else:
                raise FTPFileError("'{0}' is not directory".format(local_path))
        else:
            raise FTPFileError("'{0}' is not exists".format(local_path))
示例#16
0
 def test_apply_local_fs_snapshot(self):
     _config_file = os.path.join(os.path.dirname(__file__),
                                 'resources',
                                 'bootsrap',
                                 'bootstrap.ini')
     test_dir = LocalFS('/tmp/data_tmp')
     if test_dir.exists():
         test_dir.delete_directory()
     try:
         metastore = IniFileMetaStore(file=_config_file)
         _config = Configuration.load(metastore)
         apply_localfs_snapshot(_config)
         self.assertTrue(test_dir.exists(), "Folder was not created")
     finally:
         test_dir.delete_directory()
示例#17
0
 def __copy_file_from_local(self, local_path, path, create_parents=False):
     """
     Copies file from local
     """
     LocalFS(local_path).assert_exists()
     base_dir = self.base_dir(path)
     if self.exists(base_dir):
         self.__ftp_driver.storbinary("STOR {0}".format(path),
                                      open(local_path, "rb"))
     else:
         self.__assert_recursive(create_parents)
         self.create_dir(base_dir)
         self.__ftp_driver.storbinary("STOR {0}".format(path),
                                      open(local_path, "rb"))
示例#18
0
文件: test_hdfs.py 项目: epam/Merlin
 def test_merge(self):
     basedir = os.path.dirname(os.path.realpath(__file__))
     local = LocalFS(os.path.join(basedir, "resources", "test_merge"))
     hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
     merged_file = LocalFS(os.path.join(basedir, "resources", "merged.txt"))
     try:
         local.copy_to_hdfs(hdfs_file.path)
         self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS")
         hdfs_file.merge(merged_file.path)
         self.assertTrue(merged_file.exists(), "merged file was not copied to local fs")
     finally:
         hdfs_file.delete_directory()
         merged_file.delete()
示例#19
0
 def test_broker(self):
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
     local = LocalFS("/tmp/kafka-test")
     if not local.exists():
         local.create_directory()
     thread = KafkaThreadBroker()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('netstat -lntu')
     self.assertTrue("9010" in cmd.stdout, cmd.stdout)
     local.delete_directory()
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
示例#20
0
 def download_file(self, path, local_path):
     """
     Copies a remote file (path) from the SFTP server
     to the local host as local_path
     :param path: path to file on ftp
     :param local_path: path to future file or existing directory
     :type path: str
     :type local_path: str
     """
     self.__assert_exists(path)
     LocalFS(local_path).assert_exists()
     self.__assert_is_not_dir(path)
     self.__ftp_driver.retrbinary(
         "RETR {0}".format(path),
         open(os.path.join(local_path, os.path.basename(path)),
              "w+b").write)
示例#21
0
 def _run_(self, application, test_id=str(uuid.uuid4())):
     basedir = LocalFS(os.path.join("/tmp", "test_spark", test_id))
     try:
         basedir.create_directory()
         _app_input = self.input_path
         _app_output_dir = os.path.join(basedir.path, "output")
         status = application.run('file:' + _app_input,
                                  'file:' + _app_output_dir)
         self.assertTrue(status.is_ok(), status.stderr())
         self.assertTrue(os.path.exists(_app_output_dir), status.stderr())
     finally:
         basedir.delete_directory()
示例#22
0
    def test_mr_job_command_generation_with_arguments(self):
        _job_name = "test_mr_job_%s" % uuid.uuid4()

        _base_dir = HDFS(os.path.join("/tmp", _job_name))
        _base_dir.create_directory()
        try:
            jar = os.path.join(os.path.dirname(__file__), 'resources', 'mapreduce', 'hadoop-mapreduce-examples.jar')
            # configure job inputs
            _job_input = HDFS(os.path.join(_base_dir.path, "input"))
            _job_input.create_directory()
            LocalFS(os.path.join(
                os.path.dirname(__file__),
                'resources',
                'mapreduce', 'raw-data.txt')
            ).copy_to_hdfs(
                _job_input.path
            )

            # configure job output
            _job_output = HDFS(os.path.join(_base_dir.path, "output"))
            if not os.path.exists(jar):
                self.skipTest("'%s' not found" % jar)

            job = MapReduce.prepare_mapreduce_job(jar=jar,
                                                  main_class="wordcount",
                                                  name=_job_name) \
                .with_config_option("split.by", "'\\t'") \
                .with_number_of_reducers(3) \
                .with_arguments(
                _job_input.path,
                _job_output.path
            )
            _command_submission_result = job.run()
            _command_submission_result.if_failed_raise(AssertionError("Cannot run MR job"))
            _job_status = job.status()
            self.assertTrue(_job_status is not None and _job_status.is_succeeded(), "MR job Failed")
            self.assertTrue(_job_output.exists(), "Error: empty job output")
            #     check counters
            self.assertEqual(6, _job_status.counter(group='File System Counters',
                                                    counter='HDFS: Number of write operations'))
            self.assertEqual(1, _job_status.counter(group='Job Counters', counter='Launched map tasks'))
            self.assertEqual(3, _job_status.counter(group='Job Counters', counter='Launched reduce tasks'))
            self.assertEqual(2168, _job_status.counter(group='File Input Format Counters', counter='Bytes Read'))
        finally:
            _base_dir.delete_directory()
示例#23
0
文件: test_kafka.py 项目: epam/Merlin
 def test_broker(self):
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
     local = LocalFS("/tmp/kafka-test")
     if not local.exists():
         local.create_directory()
     thread = KafkaThreadBroker()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('netstat -lntu')
     self.assertTrue("9010" in cmd.stdout, cmd.stdout)
     local.delete_directory()
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
示例#24
0
    def test_streaming_job_with_multiple_inputs(self):
        _job_basedir = HDFS(os.path.join("/tmp", str(uuid.uuid4())))
        try:

            job = self._template_streaming_job_(base_dir=_job_basedir.path)

            _additional_datasource = HDFS(os.path.join(_job_basedir.path, "input2"))
            _additional_datasource.create_directory()
            LocalFS(os.path.join(os.path.dirname(__file__), 'resources',
                                 'mapreduce', 'raw-data.txt')
            ).copy_to_hdfs(
                _additional_datasource.path)
            job.take(_additional_datasource.path)
            command_result = job.run()
            command_result.if_failed_raise(AssertionError("test_streaming_job_with_multiple_inputs test failed"))
            _job_status = job.status()
            self.assertTrue(_job_status is not None and _job_status.is_succeeded())
            # check counters
            self.assertEqual(740, _job_status.counter(group='Map-Reduce Framework', counter='Reduce input records'),
                             "counters['Map-Reduce Framework']['Reduce input records']")
        finally:
            _job_basedir.delete_directory()
示例#25
0
    def _template_streaming_job_(self, base_dir="/tmp", map_only_job=False):
        if not os.path.exists(HADOOP_STREAMING_JAR):
            self.skip("Cannot allocate %s" % HADOOP_STREAMING_JAR)
        _hdfs_basdir = HDFS(base_dir)
        if not _hdfs_basdir.exists():
            _hdfs_basdir.create_directory()
        _job_input = HDFS(os.path.join(_hdfs_basdir.path, "input"))
        _job_input.create_directory()
        _job_output = HDFS(os.path.join(_hdfs_basdir.path, "output"))
        home = os.path.dirname(__file__)
        _mapper = os.path.join(home, 'resources', 'mapreduce', 'mapper.py')
        _reducer = os.path.join(home, 'resources', 'mapreduce', 'reducer.py')

        LocalFS(
            os.path.join(os.path.dirname(__file__), 'resources', 'mapreduce', 'raw-data.txt')
        ).copy_to_hdfs(
            _job_input.path
        )

        return MapReduce.prepare_streaming_job(name="test-mr-streaming-job{}".format(str(uuid.uuid4())), jar=HADOOP_STREAMING_JAR) \
            .take(_job_input.path) \
            .process_with(mapper=_mapper, reducer=None if map_only_job else _reducer) \
            .save(_job_output.path)
示例#26
0
文件: setup.py 项目: Mbaroudi/Merlin
    if ftp.exists():
        ftp.delete(recursive=True)
    ftp.create(make_dir=True)

    # upload file to directory on FTP
    ftp.upload(local_path=os.path.join(os.path.dirname(__file__),
                                       "resources/file_12.11.2014_.txt"))
    ftp.upload(local_path=os.path.join(os.path.dirname(__file__),
                                       "resources/file_13.11.2014_.txt"))
    ftp.upload(local_path=os.path.join(os.path.dirname(__file__),
                                       "resources/file_14.11.2014_.txt"))

    # upload file to HDFS/ create directories
    hdfs_file = HDFS("{0}/raw/12.11.2014".format(BASE_DIR))
    hdfs_file.create(directory=True)
    local_file = LocalFS(path=os.path.join(os.path.dirname(__file__),
                                           'resources/file_12.11.2014_.txt'))
    local_file.copy_to_hdfs(hdfs_path="{0}/raw/12.11.2014".format(BASE_DIR))

    hdfs_file = HDFS("{0}/raw/13.11.2014".format(BASE_DIR))
    hdfs_file.create(directory=True)
    local_file = LocalFS(path=os.path.join(os.path.dirname(__file__),
                                           'resources/file_13.11.2014_.txt'))
    local_file.copy_to_hdfs(hdfs_path="{0}/raw/13.11.2014".format(BASE_DIR))

    # create empty local directory 'tmp' in folder 'resources'
    local_file = LocalFS(
        path=os.path.join(os.path.dirname(__file__), 'resources/tmp'))
    if local_file.exists():
        local_file.delete_directory()
    local_file.create(directory=True)
示例#27
0
文件: flow.py 项目: epam/Merlin
def on_flow_failed(context):
    local_file = LocalFS(path=os.path.join(os.path.dirname(__file__),
                                           'resources/tmp'))
    if local_file.exists():
        local_file.delete_directory()
示例#28
0
文件: flow.py 项目: Mbaroudi/Merlin
def on_flow_failed(context):
    local_file = LocalFS(
        path=os.path.join(os.path.dirname(__file__), 'resources/tmp'))
    if local_file.exists():
        local_file.delete_directory()
示例#29
0
文件: setup.py 项目: epam/Merlin
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# See the NOTICE file and the LICENSE file distributed with this work
# for additional information regarding copyright ownership and licensing.
#

import os

from merlin.fs.hdfs import HDFS
from merlin.fs.localfs import LocalFS


if __name__ == "__main__":
    _basedir = HDFS(os.path.join('/tmp', 'scd.active'))

    _basedir.create_directory()
    _scd_active_snapshot = LocalFS(os.path.join(os.path.dirname(__file__), 'resources', 'scd.active.csv'))
    _scd_active_snapshot.copy_to_hdfs(_basedir.path)

示例#30
0
文件: setup.py 项目: epam/Merlin
                     password=password,
                     path=path)

    if ftp.exists():
        ftp.delete(recursive=True)
    ftp.create(make_dir=True)

    # upload file to directory on FTP
    ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_12.11.2014_.txt"))
    ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_13.11.2014_.txt"))
    ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_14.11.2014_.txt"))

    # upload file to HDFS/ create directories
    hdfs_file = HDFS("{0}/raw/12.11.2014".format(BASE_DIR))
    hdfs_file.create(directory=True)
    local_file = LocalFS(path=os.path.join(os.path.dirname(__file__),
                                           'resources/file_12.11.2014_.txt'))
    local_file.copy_to_hdfs(hdfs_path="{0}/raw/12.11.2014".format(BASE_DIR))

    hdfs_file = HDFS("{0}/raw/13.11.2014".format(BASE_DIR))
    hdfs_file.create(directory=True)
    local_file = LocalFS(path=os.path.join(os.path.dirname(__file__),
                                           'resources/file_13.11.2014_.txt'))
    local_file.copy_to_hdfs(hdfs_path="{0}/raw/13.11.2014".format(BASE_DIR))

    # create empty local directory 'tmp' in folder 'resources'
    local_file = LocalFS(path=os.path.join(os.path.dirname(__file__),
                                           'resources/tmp'))
    if local_file.exists():
        local_file.delete_directory()
    local_file.create(directory=True)
示例#31
0
文件: setup.py 项目: Mbaroudi/Merlin
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# See the NOTICE file and the LICENSE file distributed with this work
# for additional information regarding copyright ownership and licensing.
#

import os

from merlin.fs.hdfs import HDFS
from merlin.fs.localfs import LocalFS

if __name__ == "__main__":
    _basedir = HDFS(os.path.join('/tmp', 'scd.active'))

    _basedir.create_directory()
    _scd_active_snapshot = LocalFS(
        os.path.join(os.path.dirname(__file__), 'resources', 'scd.active.csv'))
    _scd_active_snapshot.copy_to_hdfs(_basedir.path)
示例#32
0
def upload_to_hdfs_updates(context):
    LocalFS(_scd_updates).copy_to_hdfs(_hdfs_tmpdir.path)
示例#33
0
from merlin.fs.localfs import LocalFS
from merlin.fs.hdfs import HDFS

BASE_DIR = "/tmp"


def clean_resources():
    """
    Cleans resources from previously flow.
    """
    hdfs_file = HDFS("{0}/data_to_export".format(BASE_DIR))
    if hdfs_file.exists():
        hdfs_file.delete(recursive=True)

    hdfs_file = HDFS("{0}/data_from_import".format(BASE_DIR))
    if hdfs_file.exists():
        hdfs_file.delete(recursive=True)

    hdfs_file = HDFS("{0}/rdbms.password".format(BASE_DIR))
    if hdfs_file.exists():
        hdfs_file.delete()


if __name__ == "__main__":
    clean_resources()

    # Copies new configuration files from 'resource' folder
    LocalFS(path=os.path.join(os.path.dirname(__file__),
                              'resources/rdbms.password')).copy_to_hdfs(
                                  hdfs_path=BASE_DIR)
示例#34
0
def apply_localfs_snapshot(config):
    """Creates initial directory structure on local file system"""
    _localfs_snapshot = FsSnapshot.load_from_config(
        config, fs_section=CONFIG_LOCAL_FS_DIRS_KEY)
    _localfs_snapshot.apply(
        mkdir_command=lambda path: LocalFS(path).create_directory())
示例#35
0
if __name__ == "__main__":

    hdfs_file = HDFS("{0}/raw".format(BASE_DIR))
    if hdfs_file.exists():
        hdfs_file.delete(recursive=True)

    config = RawConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), "resources/ftp_config.ini"))
    host_download = config.get("ftp", "host.download")
    user_name = config.get("ftp", "user.name")
    password = config.get("ftp", "password")
    path = config.get("ftp", "path")
    ftp = ftp_client(host=host_download,
                     login=user_name,
                     password=password,
                     path="/tmp")

    if ftp.exists():
        ftp.delete(recursive=True)

    local_file = LocalFS(path=os.path.join(os.path.dirname(__file__),
                                           'resources/tmp'))
    if local_file.exists():
        local_file.delete_directory()

    hive = Hive.load_queries_from_string(query="DROP DATABASE IF EXISTS hive_monitoring CASCADE;")
    hive.run()


示例#36
0
        file = open('resources/step', 'w')
        file.write(action_name)
        file.close()


if __name__ == '__main__':
    log = get_logger("SCD")

    # Prepare paths
    _pig_script = os.path.join(os.path.dirname(__file__), 'scd_processing.pig')
    _scd_active_snapshot = '/tmp/scd.active/scd.active.csv'
    _scd_updates = os.path.join(os.path.dirname(__file__), 'resources',
                                'scd.update.csv')
    _hdfs_job_output = '/tmp/scd.updated'

    _local_folder_to_monitor = LocalFS(
        os.path.join(os.path.dirname(__file__), 'resources'))
    _hdfs_basedir = HDFS('/tmp/scd.active')
    _hdfs_tmpdir = HDFS('/tmp/scd.tmp')
    _hdfs_tmpdir.create_directory()

    if _scd_updates and LocalFS(_scd_updates).exists():

        # Checks if file with last failed step is exists
        # and reads this step
        step = 'Copying scd updates to raw area on HDFS'
        if os.path.isfile('resources/step'):
            file = open('resources/step', 'r')
            step = file.read()
            file.close()

        flow = FlowRegistry.flow('Flow')