def test_ids_parse(self): i = Ids.parse(self.s) self.assertEqual(self.c,i.chrForm) self.assertFalse(i.isChar) self.assertFalse(i.isElement) i1 = Ids.parse(self.s1) self.assertEqual(self.c1,i1.chrForm)
def __init__(self, stateDir, logDir, name): self._name = name self._logDir = logDir ensureDirectory(logDir) ensureDirectory(stateDir) self._ids = Ids(stateDir, name) self._invalidIds = Ids(stateDir, name + "_invalid") self._state = State(stateDir, name) self._eventlogger = EventLogger(logDir + '/' + name +'.events') self._resetCounts()
def test_char(self): i = Ids.parse(self.c) self.assertEqual(self.s, str(i.elaborate())) self.assert_(i.isElaborated) self.assertEqual(self.c,i.synthesize().chrForm) self.assertFalse(i.isElaborated) self.assert_('氵' in i)
class HarvesterLog(object): def __init__(self, stateDir, logDir, name): self._name = name self._logDir = logDir ensureDirectory(logDir) ensureDirectory(stateDir) self._ids = Ids(stateDir, name) self._invalidIds = Ids(stateDir, name + "_invalid") self._state = State(stateDir, name) self._eventlogger = EventLogger(logDir + '/' + name +'.events') self._resetCounts() def isCurrentDay(self, date): return date.split('T')[0] == self._state.getTime().split()[0] def startRepository(self): self._resetCounts() self._state.markStarted() def _resetCounts(self): self._harvestedCount = 0 self._uploadedCount = 0 self._deletedCount = 0 def totalIds(self): return len(self._ids) def totalInvalidIds(self): return len(self._invalidIds) def eventLogger(self): # Should be removed, but is still used in Harvester. return self._eventlogger def markDeleted(self): self._ids.clear() self._state.markDeleted() self._eventlogger.logSuccess('Harvested/Uploaded/Deleted/Total: 0/0/0/0, Done: Deleted all ids.', id=self._name) def endRepository(self, token, responseDate): self._state.markHarvested(self.countsSummary(), token, responseDate) self._eventlogger.logSuccess('Harvested/Uploaded/Deleted/Total: %s, ResumptionToken: %s' % (self.countsSummary(), token), id=self._name) def endWithException(self, exType, exValue, exTb): self._state.markException(exType, exValue, self.countsSummary()) error = '|'.join(str.strip(s) for s in traceback.format_exception(exType, exValue, exTb)) self._eventlogger.logError(error, id=self._name) def countsSummary(self): return '%d/%d/%d/%d' % (self._harvestedCount, self._uploadedCount, self._deletedCount, self.totalIds()) def close(self): self._eventlogger.close() self._ids.close() self._invalidIds.close() self._state.close() def notifyHarvestedRecord(self, uploadid): self._removeFromInvalidData(uploadid) self._harvestedCount += 1 def uploadIdentifier(self, uploadid): self._ids.add(uploadid) self._uploadedCount += 1 def deleteIdentifier(self, uploadid): self._ids.remove(uploadid) self._deletedCount += 1 def logInvalidData(self, uploadid, message): self._invalidIds.add(uploadid) filePath = self._invalidDataMessageFilePath(uploadid) ensureDirectory(dirname(filePath)) open(filePath, 'w').write(message) def logIgnoredIdentifierWarning(self, uploadid): self._eventlogger.logWarning('IGNORED', uploadid) def clearInvalidData(self, repositoryId): for id in list(self._invalidIds): if id.startswith("%s:" % repositoryId): self._invalidIds.remove(id) rmtree(join(self._logDir, INVALID_DATA_MESSAGES_DIR, repositoryId)) def hasWork(self, continuousInterval=None): if continuousInterval is not None: from_ = self._state.from_ if from_ and 'T' not in from_: from_ += "T00:00:00Z" return from_ is None or ZuluTime().epoch - ZuluTime(from_).epoch > continuousInterval return self._state.token or self._state.from_ is None or not self.isCurrentDay(self._state.from_) def state(self): return self._state def invalidIds(self): return list(self._invalidIds) def _removeFromInvalidData(self, uploadid): self._invalidIds.remove(uploadid) invalidDataMessageFilePath = self._invalidDataMessageFilePath(uploadid) if isfile(invalidDataMessageFilePath): remove(invalidDataMessageFilePath) def _invalidDataMessageFilePath(self, uploadid): repositoryId, recordId = uploadid.split(":", 1) return join(self._logDir, INVALID_DATA_MESSAGES_DIR, escapeFilename(repositoryId), escapeFilename(recordId))