class url_source(source): """ A simple URL based source that parses HTML to find references to kernel files. """ _extension_pattern = re.compile(r'.*\.[^/.]+$') def __init__(self, database, prefix): super(url_source, self).__init__(database) self.prefix = prefix self.urls = [] def add_url(self, url, pattern): """ Add a URL path to a HTML document with links to kernel files. @param url: URL path to a HTML file with links to kernel files (can be either an absolute URL or one relative to self.prefix) @param pattern: regex pattern to filter kernel files links out of all othe links found in the HTML document """ # if it does not have an extension then it's a directory and it needs # a trailing '/'. NOTE: there are some false positives such as # directories named "v2.6" where ".6" will be assumed to be extension. # In order for these to work the caller must provide a trailing / if url[-1:] != '/' and not self._extension_pattern.match(url): url = url + '/' self.urls.append((url, re.compile(pattern))) @staticmethod def _get_item(url): """ Get a database.item object by fetching relevant HTTP information from the document pointed to by the given url. """ try: info = urllib2.urlopen(url).info() except IOError, err: # file is referenced but does not exist print 'WARNING: %s' % err return None size = info.get('content-length') if size: size = int(size) else: size = -1 timestamp = int(time.mktime(info.getdate('date'))) if not timestamp: timestamp = 0 return database.item(url, size, timestamp)
def _setup_merge_dictionary(self): # setup db = database.dict_database(self._path) self.god.stub_function(db, 'get_dictionary') self.god.stub_function(db, '_aquire_lock') new_files = { 'file3': database.item('file3', 30, 30000), 'file4': database.item('file4', 40, 40000), } all_files = dict(self._db_contents) all_files.update(new_files) # record db._aquire_lock.expect_call().and_return(3) db.get_dictionary.expect_call().and_return(self._db_contents) (database.tempfile.mkstemp.expect_call(prefix=self._path, dir='') .and_return((4, 'tmpfile'))) database.os.fdopen.expect_call(4, 'wb').and_return(self._file_instance) return db, new_files, all_files
def _parse_output(self, output, prefix): """ Parse rsync's "ls -l" style output and return a dictionary of database.item indexed by the "name" field. """ regex = re.compile( '-[rwx-]{9} +(\d+) (\d{4}/\d\d/\d\d \d\d:\d\d:\d\d) (.*)') res = {} for line in output.splitlines(): match = regex.match(line) if match: groups = match.groups() timestamp = time.mktime(time.strptime(groups[1], '%Y/%m/%d %H:%M:%S')) if prefix: fname = '%s/%s' % (prefix, groups[2]) else: fname = groups[2] item = database.item(fname, int(groups[0]), int(timestamp)) res[item.name] = item return res
def _get_item(url): """ Get a database.item object by fetching relevant HTTP information from the document pointed to by the given url. """ try: info = urllib2.urlopen(url).info() except IOError as err: # file is referenced but does not exist print 'WARNING: %s' % err return None size = info.get('content-length') if size: size = int(size) else: size = -1 timestamp = int(time.mktime(info.getdate('date'))) if not timestamp: timestamp = 0 return database.item(url, size, timestamp)
def get_new_files(self, _stat_func=os.stat): """ Main function, see source.get_new_files(). @param _stat_func: Used for unit testing, if we stub os.stat in the unit test then unit test failures get reported confusingly because the unit test framework tries to stat() the unit test file. """ all_files = {} for filename in os.listdir(self._path): full_filename = os.path.join(self._path, filename) try: stat_data = _stat_func(full_filename) except OSError: # File might have been removed/renamed since we listed the # directory so skip it. continue item = database.item(full_filename, stat_data.st_size, int(stat_data.st_mtime)) all_files[filename] = item return self._get_new_files(all_files)
class dict_database_unittest(unittest.TestCase): _path = 'somepath.db' _db_contents = { 'file1': database.item('file1', 10, 10000), 'file2': database.item('file2', 20, 20000), } def setUp(self): self.god = mock.mock_god() self.god.stub_function(database.cPickle, 'load') self.god.stub_function(database.cPickle, 'dump') self.god.stub_function(database.tempfile, 'mkstemp') self.god.stub_function(database.os, 'fdopen') self.god.stub_function(database.os, 'close') self.god.stub_function(database.os, 'rename') self.god.stub_function(database.os, 'unlink') self._open_mock = self.god.create_mock_function('open') self._file_instance = self.god.create_mock_class(file, 'file') def tearDown(self): self.god.unstub_all() def test_get_dictionary_no_file(self): # record (self._open_mock.expect_call(self._path, 'rb').and_raises(IOError('blah'))) # playback db = database.dict_database(self._path) self.assertEqual(db.get_dictionary(_open_func=self._open_mock), {}) self.god.check_playback() def test_get_dictionary(self): # record (self._open_mock.expect_call(self._path, 'rb').and_return(self._file_instance)) (database.cPickle.load.expect_call(self._file_instance).and_return( self._db_contents)) self._file_instance.close.expect_call() # playback db = database.dict_database(self._path) self.assertEqual(db.get_dictionary(_open_func=self._open_mock), self._db_contents) self.god.check_playback() def _setup_merge_dictionary(self): # setup db = database.dict_database(self._path) self.god.stub_function(db, 'get_dictionary') self.god.stub_function(db, '_aquire_lock') new_files = { 'file3': database.item('file3', 30, 30000), 'file4': database.item('file4', 40, 40000), } all_files = dict(self._db_contents) all_files.update(new_files) # record db._aquire_lock.expect_call().and_return(3) db.get_dictionary.expect_call().and_return(self._db_contents) (database.tempfile.mkstemp.expect_call(prefix=self._path, dir='').and_return( (4, 'tmpfile'))) database.os.fdopen.expect_call(4, 'wb').and_return(self._file_instance) return db, new_files, all_files def test_merge_dictionary(self): db, new_files, all_files = self._setup_merge_dictionary() database.cPickle.dump.expect_call( all_files, self._file_instance, protocol=database.cPickle.HIGHEST_PROTOCOL) self._file_instance.close.expect_call() database.os.rename.expect_call('tmpfile', self._path) database.os.close.expect_call(3) # playback db.merge_dictionary(new_files) self.god.check_playback() def test_merge_dictionary_disk_full(self): err = Exception('fail') db, new_files, all_files = self._setup_merge_dictionary() database.cPickle.dump.expect_call( all_files, self._file_instance, protocol=database.cPickle.HIGHEST_PROTOCOL).and_raises(err) self._file_instance.close.expect_call().and_raises(err) database.os.unlink.expect_call('tmpfile') database.os.close.expect_call(3) # playback self.assertRaises(Exception, db.merge_dictionary, new_files) self.god.check_playback()