class SeedfileSet(MultiArmedBandit): ''' classdocs ''' def __init__(self, campaign_id=None, originpath=None, localpath=None, outputpath='.', logfile=None): ''' Constructor ''' MultiArmedBandit.__init__(self) # self.campaign_id = campaign_id self.seedfile_output_base_dir = outputpath self.originpath = originpath self.localpath = localpath # TODO: merge self.outputpath with self.seedfile_output_base_dir self.outputpath = outputpath self.origindir = None self.localdir = None self.outputdir = None if logfile: hdlr = logging.FileHandler(logfile) logger.addHandler(hdlr) logger.debug('SeedfileSet output_dir: %s', self.seedfile_output_base_dir) def __enter__(self): self._setup() return self def __exit__(self, etype, value, traceback): pass def _setup(self): self._set_directories() self._copy_files_to_localdir() self._add_local_files_to_set() def _set_directories(self): if self.originpath: self.origindir = Directory(self.originpath) if self.localpath: self.localdir = Directory(self.localpath, create=True) if self.outputpath: self.outputdir = Directory(self.outputpath, create=True) def _copy_files_to_localdir(self): for f in self.origindir: self.copy_file_from_origin(f) def _add_local_files_to_set(self): self.localdir.refresh() files_to_add = [f.path for f in self.localdir] self.add_file(*files_to_add) def add_file(self, *files): for f in files: try: seedfile = SeedFile(self.seedfile_output_base_dir, f) except SeedFileError: logger.warning('Skipping empty file %s', f) continue logger.info('Adding file to set: %s', seedfile.path) self.add_item(seedfile.md5, seedfile) def remove_file(self, seedfile): logger.info('Removing file from set: %s', seedfile.basename) self.del_item(seedfile.md5) def copy_file_from_origin(self, f): if (os.path.basename(f.path) == '.DS_Store'): return 0 # convert the local filenames from <foo>.<ext> to <md5>.<ext> basename = 'sf_' + f.md5 + f.ext targets = [ os.path.join(d, basename) for d in (self.localpath, self.outputpath) ] filetools.copy_file(f.path, *targets) for target in targets: filetools.make_writable(target) def paths(self): for x in self.things.values(): yield x.path def next_item(self): ''' Returns a seedfile object selected per the scorable_set object. Verifies that the seedfile exists, and removes any nonexistent seedfiles from the set ''' if not len(self.things): raise SeedfileSetError while len(self.things): logger.debug('Thing count: %d', len(self.things)) # continue until we find one that exists, or else the set is empty sf = MultiArmedBandit.next(self) if sf.exists(): # it's still there, proceed return sf else: # it doesn't exist, remove it from the set logger.warning( 'Seedfile no longer exists, removing from set: %s', sf.path) self.del_item(sf.md5)
class Test(unittest.TestCase): def setUp(self): self.path = tempfile.mkdtemp() self.assertTrue(os.path.isdir(self.path)) # drop some files in the dir self.files = [os.path.join(self.path, filename) for filename in ('a', 'b', 'c')] [open(f, 'w') for f in self.files] self.directory = Directory(self.path) def tearDown(self): if os.path.isdir(self.path): shutil.rmtree(self.path) self.assertFalse(os.path.isdir(self.path)) self.assertFalse(os.path.exists(self.path)) def test_verify_dir(self): self.assertTrue(os.path.exists(self.path)) self.assertTrue(os.path.isdir(self.path)) # verify should fail if the dir doesn't exist shutil.rmtree(self.path) self.assertRaises(DirectoryError, self.directory._verify_dir) # verify should fail if the path is not a dir open(self.path, 'w') self.assertTrue(os.path.exists(self.path)) self.assertFalse(os.path.isdir(self.path)) self.assertRaises(DirectoryError, self.directory._verify_dir) # clean up os.remove(self.path) self.assertFalse(os.path.exists(self.path)) def test_refresh(self): # make sure we got the files we created in setup for f in self.files: self.assertTrue(f in self.directory.paths()) # create a new file, then test to see if it shows up in a refresh newfile = os.path.join(self.path, 'x') open(newfile, 'w').write('AAAA') self.assertFalse(newfile in self.directory.paths()) self.directory.refresh() self.assertTrue(newfile in self.directory.paths()) def test_symlinked_dir(self): # dir is symlink, link target exists but is not dir target_file = tempfile.mktemp() self.assertFalse(os.path.exists(target_file)) open(target_file, 'w') self.assertTrue(os.path.exists(target_file)) self.assertTrue(os.path.isfile(target_file)) link_name = tempfile.mktemp() self.assertFalse(os.path.exists(link_name)) os.symlink(target_file, link_name) self.assertTrue(os.path.exists(link_name)) self.assertTrue(os.path.islink(link_name)) self.assertTrue(os.path.isfile(link_name)) self.assertRaises(DirectoryError, Directory, link_name) os.remove(link_name) os.remove(target_file) # dir is symlink, link target is dir target_dir = tempfile.mkdtemp() self.assertTrue(os.path.isdir(target_dir)) link_name = tempfile.mktemp() self.assertFalse(os.path.exists(link_name)) os.symlink(target_dir, link_name) self.assertTrue(os.path.exists(link_name)) self.assertTrue(os.path.islink(link_name)) self.assertTrue(os.path.isdir(link_name)) d = Directory(link_name) self.assertEqual(link_name, d.dir) # remove the target dir - now we have a bad link os.rmdir(target_dir) self.assertFalse(os.path.exists(target_dir)) # dir is symlink, link target does not exist self.assertTrue(os.path.islink(link_name)) self.assertFalse(os.path.exists(os.readlink(link_name))) self.assertRaises(DirectoryError, Directory, link_name, True) os.remove(link_name) self.assertFalse(os.path.exists(link_name))
class SeedfileSet(MultiArmedBandit): ''' classdocs ''' def __init__(self, campaign_id=None, originpath=None, localpath=None, outputpath='.', logfile=None): ''' Constructor ''' MultiArmedBandit.__init__(self) # self.campaign_id = campaign_id self.seedfile_output_base_dir = outputpath self.originpath = originpath self.localpath = localpath # TODO: merge self.outputpath with self.seedfile_output_base_dir self.outputpath = outputpath self.origindir = None self.localdir = None self.outputdir = None if logfile: hdlr = logging.FileHandler(logfile) logger.addHandler(hdlr) logger.debug( 'SeedfileSet output_dir: %s', self.seedfile_output_base_dir) def __enter__(self): self._setup() return self def __exit__(self, etype, value, traceback): pass def _setup(self): self._set_directories() self._copy_files_to_localdir() self._add_local_files_to_set() def _set_directories(self): if self.originpath: self.origindir = Directory(self.originpath) if self.localpath: self.localdir = Directory(self.localpath, create=True) if self.outputpath: self.outputdir = Directory(self.outputpath, create=True) def _copy_files_to_localdir(self): for f in self.origindir: self.copy_file_from_origin(f) def _add_local_files_to_set(self): self.localdir.refresh() files_to_add = [f.path for f in self.localdir] self.add_file(*files_to_add) def add_file(self, *files): for f in files: try: seedfile = SeedFile(self.seedfile_output_base_dir, f) except SeedFileError: logger.warning('Skipping empty file %s', f) continue logger.info('Adding file to set: %s', seedfile.path) self.add_item(seedfile.md5, seedfile) def remove_file(self, seedfile): logger.info('Removing file from set: %s', seedfile.basename) self.del_item(seedfile.md5) def copy_file_from_origin(self, f): if (os.path.basename(f.path) == '.DS_Store'): return 0 # convert the local filenames from <foo>.<ext> to <md5>.<ext> basename = 'sf_' + f.md5 + f.ext targets = [os.path.join(d, basename) for d in (self.localpath, self.outputpath)] filetools.copy_file(f.path, *targets) for target in targets: filetools.make_writable(target) def paths(self): for x in self.things.values(): yield x.path def next_item(self): ''' Returns a seedfile object selected per the scorable_set object. Verifies that the seedfile exists, and removes any nonexistent seedfiles from the set ''' if not len(self.things): raise SeedfileSetError while len(self.things): logger.debug('Thing count: %d', len(self.things)) # continue until we find one that exists, or else the set is empty sf = MultiArmedBandit.next(self) if sf.exists(): # it's still there, proceed return sf else: # it doesn't exist, remove it from the set logger.warning( 'Seedfile no longer exists, removing from set: %s', sf.path) self.del_item(sf.md5)