Пример #1
0
class SeedfileSet(MultiArmedBandit):
    '''
    classdocs
    '''
    def __init__(self,
                 campaign_id=None,
                 originpath=None,
                 localpath=None,
                 outputpath='.',
                 logfile=None):
        '''
        Constructor
        '''
        MultiArmedBandit.__init__(self)
        #         self.campaign_id = campaign_id
        self.seedfile_output_base_dir = outputpath

        self.originpath = originpath
        self.localpath = localpath
        # TODO: merge self.outputpath with self.seedfile_output_base_dir
        self.outputpath = outputpath

        self.origindir = None
        self.localdir = None
        self.outputdir = None

        if logfile:
            hdlr = logging.FileHandler(logfile)
            logger.addHandler(hdlr)

        logger.debug('SeedfileSet output_dir: %s',
                     self.seedfile_output_base_dir)

    def __enter__(self):
        self._setup()
        return self

    def __exit__(self, etype, value, traceback):
        pass

    def _setup(self):
        self._set_directories()
        self._copy_files_to_localdir()
        self._add_local_files_to_set()

    def _set_directories(self):
        if self.originpath:
            self.origindir = Directory(self.originpath)
        if self.localpath:
            self.localdir = Directory(self.localpath, create=True)
        if self.outputpath:
            self.outputdir = Directory(self.outputpath, create=True)

    def _copy_files_to_localdir(self):
        for f in self.origindir:
            self.copy_file_from_origin(f)

    def _add_local_files_to_set(self):
        self.localdir.refresh()
        files_to_add = [f.path for f in self.localdir]
        self.add_file(*files_to_add)

    def add_file(self, *files):
        for f in files:
            try:
                seedfile = SeedFile(self.seedfile_output_base_dir, f)
            except SeedFileError:
                logger.warning('Skipping empty file %s', f)
                continue
            logger.info('Adding file to set: %s', seedfile.path)
            self.add_item(seedfile.md5, seedfile)

    def remove_file(self, seedfile):
        logger.info('Removing file from set: %s', seedfile.basename)
        self.del_item(seedfile.md5)

    def copy_file_from_origin(self, f):
        if (os.path.basename(f.path) == '.DS_Store'):
            return 0

        # convert the local filenames from <foo>.<ext> to <md5>.<ext>
        basename = 'sf_' + f.md5 + f.ext
        targets = [
            os.path.join(d, basename)
            for d in (self.localpath, self.outputpath)
        ]
        filetools.copy_file(f.path, *targets)
        for target in targets:
            filetools.make_writable(target)

    def paths(self):
        for x in self.things.values():
            yield x.path

    def next_item(self):
        '''
        Returns a seedfile object selected per the scorable_set object.
        Verifies that the seedfile exists, and removes any nonexistent
        seedfiles from the set
        '''
        if not len(self.things):
            raise SeedfileSetError

        while len(self.things):
            logger.debug('Thing count: %d', len(self.things))
            # continue until we find one that exists, or else the set is empty
            sf = MultiArmedBandit.next(self)
            if sf.exists():
                # it's still there, proceed
                return sf
            else:
                # it doesn't exist, remove it from the set
                logger.warning(
                    'Seedfile no longer exists, removing from set: %s',
                    sf.path)
                self.del_item(sf.md5)
Пример #2
0
class Test(unittest.TestCase):

    def setUp(self):
        self.path = tempfile.mkdtemp()
        self.assertTrue(os.path.isdir(self.path))
        # drop some files in the dir
        self.files = [os.path.join(self.path, filename) for filename in ('a', 'b', 'c')]
        [open(f, 'w') for f in self.files]
        self.directory = Directory(self.path)

    def tearDown(self):
        if os.path.isdir(self.path):
            shutil.rmtree(self.path)
        self.assertFalse(os.path.isdir(self.path))
        self.assertFalse(os.path.exists(self.path))

    def test_verify_dir(self):
        self.assertTrue(os.path.exists(self.path))
        self.assertTrue(os.path.isdir(self.path))
        # verify should fail if the dir doesn't exist
        shutil.rmtree(self.path)
        self.assertRaises(DirectoryError, self.directory._verify_dir)

        # verify should fail if the path is not a dir
        open(self.path, 'w')
        self.assertTrue(os.path.exists(self.path))
        self.assertFalse(os.path.isdir(self.path))
        self.assertRaises(DirectoryError, self.directory._verify_dir)

        # clean up
        os.remove(self.path)
        self.assertFalse(os.path.exists(self.path))

    def test_refresh(self):
        # make sure we got the files we created in setup
        for f in self.files:
            self.assertTrue(f in self.directory.paths())

        # create a new file, then test to see if it shows up in a refresh
        newfile = os.path.join(self.path, 'x')
        open(newfile, 'w').write('AAAA')

        self.assertFalse(newfile in self.directory.paths())
        self.directory.refresh()
        self.assertTrue(newfile in self.directory.paths())

    def test_symlinked_dir(self):
        # dir is symlink, link target exists but is not dir
        target_file = tempfile.mktemp()
        self.assertFalse(os.path.exists(target_file))
        open(target_file, 'w')
        self.assertTrue(os.path.exists(target_file))
        self.assertTrue(os.path.isfile(target_file))

        link_name = tempfile.mktemp()
        self.assertFalse(os.path.exists(link_name))
        os.symlink(target_file, link_name)
        self.assertTrue(os.path.exists(link_name))
        self.assertTrue(os.path.islink(link_name))
        self.assertTrue(os.path.isfile(link_name))

        self.assertRaises(DirectoryError, Directory, link_name)
        os.remove(link_name)
        os.remove(target_file)

        # dir is symlink, link target is dir
        target_dir = tempfile.mkdtemp()
        self.assertTrue(os.path.isdir(target_dir))
        link_name = tempfile.mktemp()
        self.assertFalse(os.path.exists(link_name))
        os.symlink(target_dir, link_name)
        self.assertTrue(os.path.exists(link_name))
        self.assertTrue(os.path.islink(link_name))
        self.assertTrue(os.path.isdir(link_name))

        d = Directory(link_name)
        self.assertEqual(link_name, d.dir)

        # remove the target dir - now we have a bad link
        os.rmdir(target_dir)
        self.assertFalse(os.path.exists(target_dir))

        # dir is symlink, link target does not exist
        self.assertTrue(os.path.islink(link_name))
        self.assertFalse(os.path.exists(os.readlink(link_name)))
        self.assertRaises(DirectoryError, Directory, link_name, True)

        os.remove(link_name)
        self.assertFalse(os.path.exists(link_name))
class SeedfileSet(MultiArmedBandit):
    '''
    classdocs
    '''

    def __init__(self, campaign_id=None, originpath=None, localpath=None,
                 outputpath='.', logfile=None):
        '''
        Constructor
        '''
        MultiArmedBandit.__init__(self)
#         self.campaign_id = campaign_id
        self.seedfile_output_base_dir = outputpath

        self.originpath = originpath
        self.localpath = localpath
        # TODO: merge self.outputpath with self.seedfile_output_base_dir
        self.outputpath = outputpath

        self.origindir = None
        self.localdir = None
        self.outputdir = None

        if logfile:
            hdlr = logging.FileHandler(logfile)
            logger.addHandler(hdlr)

        logger.debug(
            'SeedfileSet output_dir: %s', self.seedfile_output_base_dir)

    def __enter__(self):
        self._setup()
        return self

    def __exit__(self, etype, value, traceback):
        pass

    def _setup(self):
        self._set_directories()
        self._copy_files_to_localdir()
        self._add_local_files_to_set()

    def _set_directories(self):
        if self.originpath:
            self.origindir = Directory(self.originpath)
        if self.localpath:
            self.localdir = Directory(self.localpath, create=True)
        if self.outputpath:
            self.outputdir = Directory(self.outputpath, create=True)

    def _copy_files_to_localdir(self):
        for f in self.origindir:
            self.copy_file_from_origin(f)

    def _add_local_files_to_set(self):
        self.localdir.refresh()
        files_to_add = [f.path for f in self.localdir]
        self.add_file(*files_to_add)

    def add_file(self, *files):
        for f in files:
            try:
                seedfile = SeedFile(self.seedfile_output_base_dir, f)
            except SeedFileError:
                logger.warning('Skipping empty file %s', f)
                continue
            logger.info('Adding file to set: %s', seedfile.path)
            self.add_item(seedfile.md5, seedfile)

    def remove_file(self, seedfile):
        logger.info('Removing file from set: %s', seedfile.basename)
        self.del_item(seedfile.md5)

    def copy_file_from_origin(self, f):
        if (os.path.basename(f.path) == '.DS_Store'):
            return 0

        # convert the local filenames from <foo>.<ext> to <md5>.<ext>
        basename = 'sf_' + f.md5 + f.ext
        targets = [os.path.join(d, basename)
                   for d in (self.localpath, self.outputpath)]
        filetools.copy_file(f.path, *targets)
        for target in targets:
            filetools.make_writable(target)

    def paths(self):
        for x in self.things.values():
            yield x.path

    def next_item(self):
        '''
        Returns a seedfile object selected per the scorable_set object.
        Verifies that the seedfile exists, and removes any nonexistent
        seedfiles from the set
        '''
        if not len(self.things):
            raise SeedfileSetError

        while len(self.things):
            logger.debug('Thing count: %d', len(self.things))
            # continue until we find one that exists, or else the set is empty
            sf = MultiArmedBandit.next(self)
            if sf.exists():
                # it's still there, proceed
                return sf
            else:
                # it doesn't exist, remove it from the set
                logger.warning(
                    'Seedfile no longer exists, removing from set: %s', sf.path)
                self.del_item(sf.md5)