Exemplo n.º 1
0
    def transfer_to_hdfs(self):
        """Copy any necessary input files to HDFS.

        This transfers both common exe/setup (if self.share_exe_setup == True),
        and the individual files required by each Job.
        """
        # Do copying of exe/setup script here instead of through Jobs if only
        # 1 instance required on HDFS.
        if self.share_exe_setup:
            if self.copy_exe:
                log.info('Copying %s -->> %s', self.exe, self.hdfs_store)
                cp_hdfs(self.exe, self.hdfs_store)
            if self.setup_script:
                log.info('Copying %s -->> %s', self.setup_script,
                         self.hdfs_store)
                cp_hdfs(self.setup_script, self.hdfs_store)

        # Transfer common input files
        for ifile in self.common_input_file_mirrors:
            log.info('Copying %s -->> %s', ifile.original, ifile.hdfs)
            cp_hdfs(ifile.original, ifile.hdfs)

        # Get each job to transfer their necessary files
        for job in self.jobs.itervalues():
            job.transfer_to_hdfs()
Exemplo n.º 2
0
    def transfer_to_hdfs(self):
        """Transfer files across to HDFS.

        Auto-creates HDFS mirror dir if it doesn't exist, but only if
        there are 1 or more files to transfer.

        Will not transfer exe or setup script if manager.share_exe_setup is True.
        That is left for the manager to do.
        """
        # skip the exe.setup script - the JobSet should handle this itself.
        files_to_transfer = []
        for ifile in self.input_file_mirrors:
            if ((ifile.original == ifile.hdfs) or (self.manager.share_exe_setup and
                    ifile.original in [self.manager.exe, self.manager.setup_script])):
                continue
            files_to_transfer.append(ifile)

        if len(files_to_transfer) > 0:
            check_dir_create(self.hdfs_mirror_dir)

        for ifile in files_to_transfer:
            log.info('Copying %s -->> %s', ifile.original, ifile.hdfs)
            cp_hdfs(ifile.original, ifile.hdfs)
Exemplo n.º 3
0
    def transfer_to_hdfs(self):
        """Transfer files across to HDFS.

        Auto-creates HDFS mirror dir if it doesn't exist, but only if
        there are 1 or more files to transfer.

        Will not transfer exe or setup script if manager.share_exe_setup is True.
        That is left for the manager to do.
        """
        # skip the exe.setup script - the JobSet should handle this itself.
        files_to_transfer = []
        for ifile in self.input_file_mirrors:
            if ((ifile.original == ifile.hdfs) or (self.manager.share_exe_setup and
                    ifile.original in [self.manager.exe, self.manager.setup_script])):
                continue
            files_to_transfer.append(ifile)

        if len(files_to_transfer) > 0:
            check_dir_create(self.hdfs_mirror_dir)

        for ifile in files_to_transfer:
            log.info('Copying %s -->> %s', ifile.original, ifile.hdfs)
            cp_hdfs(ifile.original, ifile.hdfs)
Exemplo n.º 4
0
    def transfer_to_hdfs(self):
        """Copy any necessary input files to HDFS.

        This transfers both common exe/setup (if self.share_exe_setup == True),
        and the individual files required by each Job.
        """
        # Do copying of exe/setup script here instead of through Jobs if only
        # 1 instance required on HDFS.
        if self.share_exe_setup:
            if self.copy_exe:
                log.info('Copying %s -->> %s', self.exe, self.hdfs_store)
                cp_hdfs(self.exe, self.hdfs_store)
            if self.setup_script:
                log.info('Copying %s -->> %s', self.setup_script, self.hdfs_store)
                cp_hdfs(self.setup_script, self.hdfs_store)

        # Transfer common input files
        for ifile in self.common_input_file_mirrors:
            log.info('Copying %s -->> %s', ifile.original, ifile.hdfs)
            cp_hdfs(ifile.original, ifile.hdfs)

        # Get each job to transfer their necessary files
        for job in self.jobs.itervalues():
            job.transfer_to_hdfs()