示例#1
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp('FWJRAPITest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = 'fwjrdump_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):

        jobID = 1
        retryCount = 0
        fwjrDocument = {
            "_id": "%s-%s" % (jobID, retryCount),
            "jobid": jobID,
            "retrycount": retryCount,
            "archivestatus": "ready",
            "fwjr": SAMPLE_FWJR,
            "jobtype": "Merge",
            "type": "fwjr"
        }
        workflow = SAMPLE_FWJR['task'].split('/')[1]

        self.assertEqual(
            self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'],
            fwjrDocument['_id'])
        self.assertEqual(
            self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'],
            fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False)
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id'])
        self.assertEqual(
            self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'],
            fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True)
        self.assertEqual(
            self.fwjrAPI.getFWJRWithSkippedFiles()['rows'][0]['value']
            ['skippedFiles'], 2)
示例#2
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """

    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp("FWJRAPITest")
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = "fwjrdump_t"
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):

        jobID = 1
        retryCount = 0
        fwjrDocument = {
            "_id": "%s-%s" % (jobID, retryCount),
            "jobid": jobID,
            "retrycount": retryCount,
            "archivestatus": "ready",
            "fwjr": SAMPLE_FWJR,
            "jobtype": "Merge",
            "type": "fwjr",
        }
        workflow = SAMPLE_FWJR["task"].split("/")[1]

        self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")["rows"][0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False)
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")["rows"][0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True)
        self.assertEqual(self.fwjrAPI.getFWJRWithSkippedFiles()["rows"][0]["value"]["skippedFiles"], 2)
示例#3
0
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """

    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config

    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")

        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL)
        self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000)
        self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200)

    @timeFunction
    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows']
            logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data))

            for slicedData in grouper(data, self.numDocsUploadPerCall):
                jobIDs = []
                archiveDocs = []
                for job in slicedData:
                    doc = createArchiverDoc(job)
                    archiveDocs.append(doc)
                    jobIDs.append(job["id"])

                response = self.wmarchiver.archiveData(archiveDocs)

                # Partial success is not allowed either all the insert is successful or none is
                if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs):
                    archiveIDs = response[0]['ids']
                    for docID in jobIDs:
                        self.fwjrAPI.updateArchiveUploadedStatus(docID)
                    logging.info("...successfully uploaded %d docs", len(jobIDs))
                    logging.debug("JobIDs uploaded: %s", jobIDs)
                    logging.debug("Archived IDs returned: %s", archiveIDs)
                else:
                    logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.",
                                    response[0]['status'], response[0]['reason'])
                    logging.debug("failed JobIds %s", jobIDs)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s", traceback.format_exc())
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """

    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config

    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")

        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL)
        self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000)
        self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows']
            logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data))

            for slicedData in grouper(data, self.numDocsUploadPerCall):
                jobIDs = []
                archiveDocs = []
                for job in slicedData:
                    doc = createArchiverDoc(job)
                    archiveDocs.append(doc)
                    jobIDs.append(job["id"])

                response = self.wmarchiver.archiveData(archiveDocs)

                # Partial success is not allowed either all the insert is successful or none is
                if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs):
                    archiveIDs = response[0]['ids']
                    for docID in jobIDs:
                        self.fwjrAPI.updateArchiveUploadedStatus(docID)
                    logging.info("...successfully uploaded %d docs", len(jobIDs))
                    logging.debug("JobIDs uploaded: %s", jobIDs)
                    logging.debug("Archived IDs returned: %s", archiveIDs)
                else:
                    logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.",
                                    response[0]['status'], response[0]['reason'])
                    logging.debug("failed JobIds %s", jobIDs)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s", traceback.format_exc())
示例#5
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp('FWJRAPITest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 'fwjrdump_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI= FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):
        
        jobID = 1
        retryCount = 0
        fwjrDocument = {"_id": "%s-%s" % (jobID, retryCount),
                        "jobid": jobID,
                        "retrycount": retryCount,
                        "archivestatus": "ready",
                        "fwjr": SAMPLE_FWJR,
                        "type": "fwjr"}
        self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'], fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'], fwjrDocument['_id'])      
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'], fwjrDocument['_id'])
示例#6
0
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """
    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config

    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine,
                                      "couchDBName")

        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchiver(
            self.config.ArchiveDataReporter.WMArchiverURL)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Getting not archived data info from FWRJ db...")
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows']

            #TODO need to send bulk update update bulk archive
            jobIDs = []
            archiverDocs = []
            for job in data:
                doc = self.wmarchiver.createArchiverDoc(
                    job["id"], job['doc']["fwjr"])
                archiverDocs.append(doc)
                jobIDs.append(job["id"])

            response = self.wmarchiver.archiveData(archiverDocs)

            # Partial success is not allowed either all the insert is successful of none is successful.
            if response[0]['status'] == "ok" and len(
                    response[0]['ids']) == len(jobIDs):
                for docID in jobIDs:
                    self.fwjrAPI.updateArchiveUploadedStatus(docID)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s" % traceback.format_exc())
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """
    
    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config
                         
    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")
         
        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL)
        

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Getting not archived data info from FWRJ db...")
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows']
            
            #TODO need to send bulk update update bulk archive
            jobIDs = []
            archiverDocs = []
            for job in data:
                doc = self.wmarchiver.createArchiverDoc(job["id"], job['doc']["fwjr"])
                archiverDocs.append(doc)
                jobIDs.append(job["id"])
                
            response = self.wmarchiver.archiveData(archiverDocs)
            
            # Partial success is not allowed either all the insert is successful of none is successful.
            if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs):
                for docID in jobIDs:
                    self.fwjrAPI.updateArchiveUploadedStatus(docID)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s" % traceback.format_exc())