def getRunningStatesForDomain(self, domainId): return sorted([ mergeDicts(jsonLoad(open(filepath)), {'repositoryId': repoId}) for groupId in self.call.getRepositoryGroupIds(domainId=domainId) for repoId in self.call.getRepositoryIds(domainId=domainId, repositoryGroupId=groupId) for filepath in [join(self._statePath, domainId, escapeFilename("%s.running" % repoId))] if isfile(filepath) ], key=lambda d: d['changedate'], reverse=True)
def read(directory): jsonData = jsonLoad(open(join(directory, CONFIG_FILENAME))) repository = Repository( name=jsonData['name'], baseUrl=jsonData['baseUrl'], metadataPrefix=jsonData['metadataPrefix'], setSpec=jsonData['setSpec'], active=jsonData['active'], apiKey=jsonData['apiKey'], directory=directory ) repository.readState() return repository
def testDownloadError(self): def downloadBatchRaises(resumptionAttributes): raise IOError('help!') self.observer.methods['downloadBatch'] = downloadBatchRaises try: self.harvester.harvest() self.fail() except IOError: pass persistedState = jsonLoad(open(join(self.tempdir, 'state'))) self.assertEquals({ 'harvestingReady': False, 'datetime': '1976-11-08T12:34:56Z', 'resumptionAttributes': None, 'error': True}, persistedState) lastError = open(join(self.tempdir, 'last_error')).read() self.assertTrue('help!' in lastError, lastError)
def _readState(self): if isfile(self._resumptionFilename): values = jsonLoad(open(self._resumptionFilename)) self.token = values.get('resumptionToken', None) or None self.from_ = values.get('from', '') or None return # The mechanism below will only be carried out once in case the resumption file does not yet exist. if isfile(self._statsfilename): self._statsfile = open(self._statsfilename) logline = None for logline in self._filterNonErrorLogLine(self._statsfile): if not self.token: self.from_ = getStartDate(logline) self.token = getResumptionToken(logline) if logline and self._isDeleted(logline): self.from_ = None self.token = None self._statsfile.close()
def testConvertError(self): batch = Batch() batch.records = [Record('id0', 'data0'), Record('id1', 'data1')] batch.harvestingReady = True self.observer.methods['downloadBatch'] = lambda **kwargs: batch def convertRaises(record): raise RuntimeError('help!') self.observer.methods['convert'] = convertRaises try: self.harvester.harvest() self.fail() except RuntimeError: pass persistedState = jsonLoad(open(join(self.tempdir, 'state'))) self.assertEquals({ 'harvestingReady': False, 'datetime': '1976-11-08T12:34:56Z', 'resumptionAttributes': None, 'error': True}, persistedState) lastError = open(join(self.tempdir, 'last_error')).read() self.assertTrue('help!' in lastError, lastError)
def __init__(self, stateDir, port, configFile): self.config = jsonLoad(open(configFile)) self.stateDir = stateDir self.port = port self.configBasedir = dirname(abspath(configFile)) assert all( type(v) == dict for v in self.config.values() ), "Core feature descriptions must be a dictionary (empty for no additional features)." if not isdir(self.stateDir): copytree(join(usrShareDir, 'solr-data'), self.stateDir) else: newMatchVersion = parse( open(join( usrShareDir, 'core-data', 'conf', 'solrconfig.xml'))).xpath("//luceneMatchVersion/text()")[0] for coreDir in listdir(join(self.stateDir, 'cores')): currentMatchVersion = parse( open( join(self.stateDir, 'cores', coreDir, 'conf', 'solrconfig.xml'))).xpath( "//luceneMatchVersion/text()")[0] if currentMatchVersion != newMatchVersion: raise ValueError( "LuceneMatchVersion in core '%s' does not match the new configuration. Remove the old index." % coreDir) rmtree(join(self.stateDir, 'lib')) copytree(join(usrShareDir, 'solr-data', 'lib'), join(self.stateDir, 'lib')) self._setupJettyXml() self._setupStartConfig() self._setupSolrXml() self._setupCoreData() self._setFeatureDefaults('autoCommit', {'autoCommitMaxTime': 1000}) for core, features in self.config.items(): for feature, options in features.items(): self._setupFeature(name=feature, core=core, options=options)
def __init__(self, stateDir, port, configFile): self.config = jsonLoad(open(configFile)) self.stateDir = stateDir self.port = port self.configBasedir = dirname(abspath(configFile)) assert all( type(v) == dict for v in self.config.values() ), "Core feature descriptions must be a dictionary (empty for no additional features)." if not isdir(self.stateDir): copytree(join(usrShareDir, "solr-data"), self.stateDir) else: newMatchVersion = parse(open(join(usrShareDir, "core-data", "conf", "solrconfig.xml"))).xpath( "//luceneMatchVersion/text()" )[0] for coreDir in listdir(join(self.stateDir, "cores")): currentMatchVersion = parse( open(join(self.stateDir, "cores", coreDir, "conf", "solrconfig.xml")) ).xpath("//luceneMatchVersion/text()")[0] if currentMatchVersion != newMatchVersion: raise ValueError( "LuceneMatchVersion in core '%s' does not match the new configuration. Remove the old index." % coreDir ) rmtree(join(self.stateDir, "lib")) copytree(join(usrShareDir, "solr-data", "lib"), join(self.stateDir, "lib")) self._setupJettyXml() self._setupStartConfig() self._setupSolrXml() self._setupCoreData() self._setFeatureDefaults("autoCommit", {"autoCommitMaxTime": 1000}) for core, features in self.config.items(): for feature, options in features.items(): self._setupFeature(name=feature, core=core, options=options)
def testMarkExceptionChange(self): state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0) state.markStarted() try: raise ValueError("the same exception") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, "9999/9999/9999/9999") state.close() self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Error", "message": "the same exception"}, jsonLoad(open(join(self.tempdir, 'repo.running')))) state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0) state.markStarted() try: raise ValueError("the same exception") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, "9999/9999/9999/9999") state.close() self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Error", "message": "the same exception"}, jsonLoad(open(join(self.tempdir, 'repo.running')))) state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 19, 0, 0, 0, 0) state.markStarted() try: raise ValueError("the other exception") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, "9999/9999/9999/9999") state.close() self.assertEquals({"changedate": "2012-08-13 12:19:00", "status": "Error", "message": "the other exception"}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
def testMarkHarvesterAfterExceptionChange(self): state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0) state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, "9999/9999/9999/9999") state.close() self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Error", "message": "whatever"}, jsonLoad(open(join(self.tempdir, 'repo.running')))) state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0) state.markStarted() state.markHarvested("9999/9999/9999/9999", "resumptionToken", "2012-08-13T12:14:00") state.close() self.assertEquals({"changedate": "2012-08-13 12:17:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
def testMarkException(self): state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0) state.markStarted() state.markHarvested("9999/9999/9999/9999", "resumptionToken", "2012-08-13T12:14:00") state.close() self.assertEquals('Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n', open(join(self.tempdir, 'repo.stats')).read()) self.assertEquals('{"from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken"}', open(join(self.tempdir, 'repo.next')).read()) self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running')))) state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0) state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, "9999/9999/9999/9999") state.close() self.assertEquals("""Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Error: <type 'exceptions.ValueError'>: whatever """, open(join(self.tempdir, 'repo.stats')).read()) self.assertEquals({"changedate": "2012-08-13 12:17:00", "status": "Error", "message": "whatever"}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
def testMarkDeleted(self): state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0) state.markStarted() state.markHarvested("9999/9999/9999/9999", "resumptionToken", "2012-08-13T12:14:00") state.close() self.assertEquals('Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n', open(join(self.tempdir, 'repo.stats')).read()) self.assertEquals('{"from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken"}', open(join(self.tempdir, 'repo.next')).read()) self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running')))) state = State(self.tempdir, 'repo') state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0) state.markDeleted() state.close() self.assertEquals("""Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 0/0/0/0, Done: Deleted all ids. """, open(join(self.tempdir, 'repo.stats')).read()) self.assertEquals('{"from": "", "resumptionToken": ""}', open(join(self.tempdir, 'repo.next')).read()) self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
def readState(self): jsonData = jsonLoad(open(join(self.directory, STATE_FILENAME))) self.resumptionToken = jsonData['resumptionToken'] self.lastHarvest = jsonData['lastHarvest']
def _jsonLoad(filename): with open(filename) as fp: return jsonLoad(fp)
def _markRunningState(self, status, message=""): runningDict = jsonLoad(open(self._runningFilename)) if isfile(self._runningFilename) else {} if status != runningDict.get('status', None) or message != runningDict.get('message', None): jsonDump({'changedate': self.getTime(),'status': status, 'message': message}, open(self._runningFilename, 'w'))