Exemplo n.º 1
0
 def getRunningStatesForDomain(self, domainId):
     return sorted([
         mergeDicts(jsonLoad(open(filepath)), {'repositoryId': repoId})
         for groupId in self.call.getRepositoryGroupIds(domainId=domainId)
         for repoId in self.call.getRepositoryIds(domainId=domainId, repositoryGroupId=groupId)
         for filepath in [join(self._statePath, domainId, escapeFilename("%s.running" % repoId))]
         if isfile(filepath)
     ], key=lambda d: d['changedate'], reverse=True)
 def read(directory):
     jsonData = jsonLoad(open(join(directory, CONFIG_FILENAME)))
     repository = Repository(
         name=jsonData['name'],
         baseUrl=jsonData['baseUrl'],
         metadataPrefix=jsonData['metadataPrefix'],
         setSpec=jsonData['setSpec'],
         active=jsonData['active'],
         apiKey=jsonData['apiKey'],
         directory=directory
     )
     repository.readState()
     return repository
Exemplo n.º 3
0
 def testDownloadError(self):
     def downloadBatchRaises(resumptionAttributes):
         raise IOError('help!')
     self.observer.methods['downloadBatch'] = downloadBatchRaises
     try:
         self.harvester.harvest()
         self.fail()
     except IOError:
         pass
     persistedState = jsonLoad(open(join(self.tempdir, 'state')))
     self.assertEquals({
         'harvestingReady': False,
         'datetime': '1976-11-08T12:34:56Z',
         'resumptionAttributes': None,
         'error': True}, persistedState)
     lastError = open(join(self.tempdir, 'last_error')).read()
     self.assertTrue('help!' in lastError, lastError)
Exemplo n.º 4
0
    def _readState(self):
        if isfile(self._resumptionFilename):
            values = jsonLoad(open(self._resumptionFilename))
            self.token = values.get('resumptionToken', None) or None
            self.from_ = values.get('from', '') or None
            return

        # The mechanism below will only be carried out once in case the resumption file does not yet exist.
        if isfile(self._statsfilename):
            self._statsfile = open(self._statsfilename)
            logline = None
            for logline in self._filterNonErrorLogLine(self._statsfile):
                if not self.token:
                    self.from_ = getStartDate(logline)
                self.token = getResumptionToken(logline)
            if logline and self._isDeleted(logline):
                self.from_ = None
                self.token = None
            self._statsfile.close()
Exemplo n.º 5
0
 def testConvertError(self):
     batch = Batch()
     batch.records = [Record('id0', 'data0'), Record('id1', 'data1')]
     batch.harvestingReady = True
     self.observer.methods['downloadBatch'] = lambda **kwargs: batch
     def convertRaises(record):
         raise RuntimeError('help!')
     self.observer.methods['convert'] = convertRaises
     try:
         self.harvester.harvest()
         self.fail()
     except RuntimeError:
         pass
     persistedState = jsonLoad(open(join(self.tempdir, 'state')))
     self.assertEquals({
         'harvestingReady': False,
         'datetime': '1976-11-08T12:34:56Z',
         'resumptionAttributes': None,
         'error': True}, persistedState)
     lastError = open(join(self.tempdir, 'last_error')).read()
     self.assertTrue('help!' in lastError, lastError)
Exemplo n.º 6
0
    def __init__(self, stateDir, port, configFile):
        self.config = jsonLoad(open(configFile))
        self.stateDir = stateDir
        self.port = port
        self.configBasedir = dirname(abspath(configFile))
        assert all(
            type(v) == dict for v in self.config.values()
        ), "Core feature descriptions must be a dictionary (empty for no additional features)."

        if not isdir(self.stateDir):
            copytree(join(usrShareDir, 'solr-data'), self.stateDir)
        else:
            newMatchVersion = parse(
                open(join(
                    usrShareDir, 'core-data', 'conf',
                    'solrconfig.xml'))).xpath("//luceneMatchVersion/text()")[0]
            for coreDir in listdir(join(self.stateDir, 'cores')):
                currentMatchVersion = parse(
                    open(
                        join(self.stateDir, 'cores', coreDir, 'conf',
                             'solrconfig.xml'))).xpath(
                                 "//luceneMatchVersion/text()")[0]
                if currentMatchVersion != newMatchVersion:
                    raise ValueError(
                        "LuceneMatchVersion in core '%s' does not match the new configuration. Remove the old index."
                        % coreDir)
            rmtree(join(self.stateDir, 'lib'))
            copytree(join(usrShareDir, 'solr-data', 'lib'),
                     join(self.stateDir, 'lib'))

        self._setupJettyXml()
        self._setupStartConfig()
        self._setupSolrXml()
        self._setupCoreData()

        self._setFeatureDefaults('autoCommit', {'autoCommitMaxTime': 1000})
        for core, features in self.config.items():
            for feature, options in features.items():
                self._setupFeature(name=feature, core=core, options=options)
Exemplo n.º 7
0
    def __init__(self, stateDir, port, configFile):
        self.config = jsonLoad(open(configFile))
        self.stateDir = stateDir
        self.port = port
        self.configBasedir = dirname(abspath(configFile))
        assert all(
            type(v) == dict for v in self.config.values()
        ), "Core feature descriptions must be a dictionary (empty for no additional features)."

        if not isdir(self.stateDir):
            copytree(join(usrShareDir, "solr-data"), self.stateDir)
        else:
            newMatchVersion = parse(open(join(usrShareDir, "core-data", "conf", "solrconfig.xml"))).xpath(
                "//luceneMatchVersion/text()"
            )[0]
            for coreDir in listdir(join(self.stateDir, "cores")):
                currentMatchVersion = parse(
                    open(join(self.stateDir, "cores", coreDir, "conf", "solrconfig.xml"))
                ).xpath("//luceneMatchVersion/text()")[0]
                if currentMatchVersion != newMatchVersion:
                    raise ValueError(
                        "LuceneMatchVersion in core '%s' does not match the new configuration. Remove the old index."
                        % coreDir
                    )
            rmtree(join(self.stateDir, "lib"))
            copytree(join(usrShareDir, "solr-data", "lib"), join(self.stateDir, "lib"))

        self._setupJettyXml()
        self._setupStartConfig()
        self._setupSolrXml()
        self._setupCoreData()

        self._setFeatureDefaults("autoCommit", {"autoCommitMaxTime": 1000})
        for core, features in self.config.items():
            for feature, options in features.items():
                self._setupFeature(name=feature, core=core, options=options)
Exemplo n.º 8
0
    def testMarkExceptionChange(self):
        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0)
        state.markStarted()
        try:
            raise ValueError("the same exception")
        except:
            exType, exValue, exTraceback = exc_info()
            state.markException(exType, exValue, "9999/9999/9999/9999")
        state.close()
        self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Error", "message": "the same exception"}, jsonLoad(open(join(self.tempdir, 'repo.running'))))

        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0)
        state.markStarted()
        try:
            raise ValueError("the same exception")
        except:
            exType, exValue, exTraceback = exc_info()
            state.markException(exType, exValue, "9999/9999/9999/9999")
        state.close()
        self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Error", "message": "the same exception"}, jsonLoad(open(join(self.tempdir, 'repo.running'))))

        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 19, 0, 0, 0, 0)
        state.markStarted()
        try:
            raise ValueError("the other exception")
        except:
            exType, exValue, exTraceback = exc_info()
            state.markException(exType, exValue, "9999/9999/9999/9999")
        state.close()
        self.assertEquals({"changedate": "2012-08-13 12:19:00", "status": "Error", "message": "the other exception"}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
Exemplo n.º 9
0
    def testMarkHarvesterAfterExceptionChange(self):
        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0)
        state.markStarted()
        try:
            raise ValueError("whatever")
        except:
            exType, exValue, exTraceback = exc_info()
            state.markException(exType, exValue, "9999/9999/9999/9999")
        state.close()
        self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Error", "message": "whatever"}, jsonLoad(open(join(self.tempdir, 'repo.running'))))

        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0)
        state.markStarted()
        state.markHarvested("9999/9999/9999/9999", "resumptionToken", "2012-08-13T12:14:00")
        state.close()
        self.assertEquals({"changedate": "2012-08-13 12:17:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
Exemplo n.º 10
0
    def testMarkException(self):
        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0)
        state.markStarted()
        state.markHarvested("9999/9999/9999/9999", "resumptionToken", "2012-08-13T12:14:00")
        state.close()

        self.assertEquals('Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n', open(join(self.tempdir, 'repo.stats')).read())
        self.assertEquals('{"from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken"}', open(join(self.tempdir, 'repo.next')).read())
        self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running'))))

        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0)
        state.markStarted()
        try:
            raise ValueError("whatever")
        except:
            exType, exValue, exTraceback = exc_info()
            state.markException(exType, exValue, "9999/9999/9999/9999")
        state.close()
        self.assertEquals("""Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken
Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Error: <type 'exceptions.ValueError'>: whatever
""", open(join(self.tempdir, 'repo.stats')).read())
        self.assertEquals({"changedate": "2012-08-13 12:17:00", "status": "Error", "message": "whatever"}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
Exemplo n.º 11
0
    def testMarkDeleted(self):
        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 15, 0, 0, 0, 0)
        state.markStarted()
        state.markHarvested("9999/9999/9999/9999", "resumptionToken", "2012-08-13T12:14:00")
        state.close()

        self.assertEquals('Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n', open(join(self.tempdir, 'repo.stats')).read())
        self.assertEquals('{"from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken"}', open(join(self.tempdir, 'repo.next')).read())

        self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running'))))

        state = State(self.tempdir, 'repo')
        state._gmtime = lambda: (2012, 8, 13, 12, 17, 0, 0, 0, 0)
        state.markDeleted()
        state.close()

        self.assertEquals("""Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken
Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 0/0/0/0, Done: Deleted all ids.
""", open(join(self.tempdir, 'repo.stats')).read())
        self.assertEquals('{"from": "", "resumptionToken": ""}', open(join(self.tempdir, 'repo.next')).read())
        self.assertEquals({"changedate": "2012-08-13 12:15:00", "status": "Ok", "message": ""}, jsonLoad(open(join(self.tempdir, 'repo.running'))))
 def readState(self):
     jsonData = jsonLoad(open(join(self.directory, STATE_FILENAME)))
     self.resumptionToken = jsonData['resumptionToken']
     self.lastHarvest = jsonData['lastHarvest']
Exemplo n.º 13
0
 def _jsonLoad(filename):
     with open(filename) as fp:
         return jsonLoad(fp)
Exemplo n.º 14
0
 def _markRunningState(self, status, message=""):
     runningDict = jsonLoad(open(self._runningFilename)) if isfile(self._runningFilename) else {}
     if status != runningDict.get('status', None) or message != runningDict.get('message', None):
         jsonDump({'changedate': self.getTime(),'status': status, 'message': message}, open(self._runningFilename, 'w'))