示例#1
0
class ReproducibleSeedingTests(unittest.TestCase):
    def setUp(self):
        self.testInit = TestInitCouchApp("ReproducibleSeedingTest")
        self.testInit.setupCouch("seeding_config_cache", "GroupUser", "ConfigCache")
        self.database = Database(self.testInit.couchDbName, self.testInit.couchUrl)

        self.documentId = None

    def tearDown(self):
        self.testInit.tearDownCouch()
        return


    def testA(self):
        """instantiate"""

        document = Document()
        document[u'pset_tweak_details'] = {}
        document[u'pset_tweak_details'][u'process'] = {}
        document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'] = {}
        document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed1'] = {}
        document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed2'] = {}
        document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed3'] = {}

        document = self.database.commitOne(document)[0]
        seeder = ReproducibleSeeding(CouchUrl = self.testInit.couchUrl,
                                     CouchDBName = self.testInit.couchDbName,
                                     ConfigCacheDoc = document[u'id'])

        job = Job("testjob")
        seeder(job)
        baggage = job.getBaggage()
        seed1 = getattr(baggage.process.RandomNumberGeneratorService, "seed1", None)
        self.failUnless(seed1 != None)
示例#2
0
class CouchSink(object):
    """
    Alert sink for pushing alerts to a couch database.

    """

    def __init__(self, config):
        self.config = config
        logging.info("Instantiating ...")
        # test if the configured database does not exist, create it
        server = CouchServer(self.config.url)
        databases = server.listDatabases()
        if self.config.database not in databases:
            logging.warn(
                "'%s' database does not exist on %s, creating it ..." % (self.config.database, self.config.url)
            )
            server.createDatabase(self.config.database)
            logging.warn("Created.")
        logging.info("'%s' database exists on %s" % (self.config.database, self.config.url))
        self.database = Database(self.config.database, self.config.url)
        logging.info("Initialized.")

    def send(self, alerts):
        """
        Handle list of alerts.

        """
        retVals = []
        for a in alerts:
            doc = Document(None, a)
            retVal = self.database.commitOne(doc)
            retVals.append(retVal)
        logging.debug("Stored %s alerts to CouchDB, retVals: %s" % (len(alerts), retVals))
        return retVals
示例#3
0
class CouchSink(object):
    """
    Alert sink for pushing alerts to a couch database.

    """
    def __init__(self, config):
        self.config = config
        logging.info("Instantiating ...")
        # test if the configured database does not exist, create it
        server = CouchServer(self.config.url)
        databases = server.listDatabases()
        if self.config.database not in databases:
            logging.warn(
                "'%s' database does not exist on %s, creating it ..." %
                (self.config.database, self.config.url))
            server.createDatabase(self.config.database)
            logging.warn("Created.")
        logging.info("'%s' database exists on %s" %
                     (self.config.database, self.config.url))
        self.database = Database(self.config.database, self.config.url)
        logging.info("Initialized.")

    def send(self, alerts):
        """
        Handle list of alerts.

        """
        retVals = []
        for a in alerts:
            doc = Document(None, a)
            retVal = self.database.commitOne(doc)
            retVals.append(retVal)
        logging.debug("Stored %s alerts to CouchDB, retVals: %s" %
                      (len(alerts), retVals))
        return retVals
示例#4
0
class CouchSink(object):
    """
    Alert sink for pushing alerts to a couch database.
    
    """

    def __init__(self, config):
        self.config = config
        # test if the configured database does not exist, create it
        server = CouchServer(self.config.url)
        databases = server.listDatabases()
        if self.config.database not in databases:
            server.createDatabase(self.config.database)
        self.database = Database(self.config.database, self.config.url)
        logging.debug("%s initialized." % self.__class__.__name__)

    def send(self, alerts):
        """
        Handle list of alerts.
        
        """
        retVals = []
        for a in alerts:
            doc = Document(None, a)
            retVal = self.database.commitOne(doc)
            retVals.append(retVal)
        logging.debug("%s stored alerts, retVals: %s" % (self.__class__.__name__, retVals))
        return retVals
示例#5
0
def update_software(config_file):
    """
    Functions retrieves CMSSW versions and scramarchs from CMS tag collector.

    """
    config = loadConfigurationFile(config_file)
    # source of the data
    tag_collector_url = config.views.data.tag_collector_url
    # store the data into CouchDB auxiliary database under "software" document
    couch_host = config.views.data.couch_host
    reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db

    # get data from tag collector
    all_archs_and_versions = _get_all_scramarchs_and_versions(
        tag_collector_url)
    if not all_archs_and_versions:
        return

    # get data already stored in CouchDB
    couchdb = Database(dbname=reqmgr_aux_db, url=couch_host)
    try:
        sw_already_stored = couchdb.document("software")
        del sw_already_stored["_id"]
        del sw_already_stored["_rev"]
    except CouchNotFoundError:
        logging.error("Document id software, does not exist, creating it ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
        return

    # now compare recent data from tag collector and what we already have stored
    # sorting is necessary
    if sorted(all_archs_and_versions) != sorted(sw_already_stored):
        logging.debug(
            "ScramArch/CMSSW releases changed, updating software document ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
    """
示例#6
0
def update_software(config_file):
    """
    Functions retrieves CMSSW versions and scramarchs from CMS tag collector.
    
    """
    config = loadConfigurationFile(config_file)
    # source of the data
    tag_collector_url = config.views.data.tag_collector_url
    # store the data into CouchDB auxiliary database under "software" document
    couch_host = config.views.data.couch_host
    reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db
    
    # get data from tag collector
    all_archs_and_versions = _get_all_scramarchs_and_versions(tag_collector_url)
    if not all_archs_and_versions:
        return
    
    # get data already stored in CouchDB    
    couchdb = Database(dbname=reqmgr_aux_db, url=couch_host)
    try:
        sw_already_stored = couchdb.document("software")
        del sw_already_stored["_id"]
        del sw_already_stored["_rev"]
    except CouchNotFoundError:
        logging.error("Document id software, does not exist, creating it ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
        return
    
    # now compare recent data from tag collector and what we already have stored
    # sorting is necessary
    if sorted(all_archs_and_versions) != sorted(sw_already_stored):
        logging.debug("ScramArch/CMSSW releases changed, updating software document ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
    """
示例#7
0
class CouchSink(object):
    """
    Alert sink for pushing alerts to a couch database.
    
    """     
    def __init__(self, config):
        self.config = config
        self.database = Database(self.config.database, self.config.url)
        
        
    def send(self, alerts):
        """
        Handle list of alerts.
        
        """
        retVals = []
        for a in alerts:
            doc = Document(None, a)
            retVal = self.database.commitOne(doc)
            retVals.append(retVal)
        return retVals
示例#8
0
文件: API.py 项目: dmwm/DQIS
class TestDQISResult(unittest.TestCase):
    DB_NAME = 'dqis_test'
    DB_URL = 'localhost:5984'

    def setUp(self):        
        couch = CouchServer(dburl=self.DB_URL)
        if self.DB_NAME in couch.listDatabases():
            couch.deleteDatabase(self.DB_NAME)
        
        cdb = couch.connectDatabase(self.DB_NAME)

        #for dq_t in test_data.demo_data:
        #    cdb.queue(dq_t)
        
        cdb.commit()
        
        self.db = Database(dbname=self.DB_NAME)
        
    
    def test_init(self):        
        #self.assertEqual(1,2)
        pass

    def test_save_and_delete(self):
        #Shoud document get revision number after save?
        #Document can not be saved and then deleted. Because save returns not a DQISResult object!
        
        #Tests document saving 
        document = {"_id": "abc", "test":"data"}
        r = API.DQISResult(dqis_db = self.db, dict = document)
        all_docs_count_before = len(self.db.allDocs()['rows'])
        r.save()
        all_docs_count_after_insert = len(self.db.allDocs()['rows'])        
        
        self.assertEqual(all_docs_count_before +1, all_docs_count_after_insert)
        
        
        #Test delete
        doc = self.db.document("abc")
        r = API.DQISResult(dict=doc, dqis_db = self.db)
        self.assertEqual(doc["test"], "data")
        r.delete()
        self.db.commitOne(r)
        all_docs_count_after_deleting = len(self.db.allDocs()['rows']) 
        self.assertEqual(all_docs_count_before, all_docs_count_after_deleting )
        
    def test_savable(self):
        #Does ID has to raise exception
        rez = API.DQISResult(dict = {'_id': "123"})._require_savable()
        self.assertEqual(rez, None)
        self.assertRaises(DQISResultNotSavable, 
                    API.DQISResult(dict = {'id': "123"})._require_savable )
        self.assertRaises(DQISResultNotSavable, 
                    API.DQISResult(dict = {'abc': "123"})._require_savable )
        
    def test_find_id(self): #similar to test_savable
        self.assertEqual(DQISResult()._find_id(), "")
        self.assertEqual(DQISResult(dict = {'id': "123"})._find_id(), "123")
        self.assertEqual(DQISResult(dict = {'_id': "123"})._find_id(), "123") 
        
    def test_find_id(self):
        id1 = API.DQISResult()._find_id()
        id2 = API.DQISResult(dict = {'id': "123"})._find_id()
        id3 = API.DQISResult(dict = {'_id': "abc"})._find_id()
        self.assertEqual(id1, "")
        self.assertEqual(id2, '123')
        self.assertEqual(id3, 'abc')
        
    def test_require_saveable(self):
        dr1 = API.DQISResult()._require_savable
        #dr2 = API.DQISResult(dict = {'_id': "123"})._require_savable
        self.assertRaises(DQISResultNotSavable, dr1)
        #self.assertEqual(None, dr2())
        
    def test_save_to_queue(self):
        r = DQISResult(dqis_db = Database(), dict = {"_id": "abc"})
        queue_size_before = len(r.dqis_db._queue)
        r.saveToQueue()
        queue_size_after = len(r.dqis_db._queue) 
        self.assertEqual(queue_size_before, 0)
        self.assertEqual(queue_size_after, 1)
        r.dqis_db._reset_queue()
        
        
    def test_require_db(self):
        f = DQISResult()._require_db_connection
        self.assertRaises(DatabaseNotSetException, f)  
        
        f = DQISResult(dqis_db = "dqis_db")._require_db_connection
        self.assertRaises(DatabaseNotSetException, f)  
        
        f = DQISResult(dqis_db = Database())._require_db_connection
        self.assertEqual(None, f())



    def test_get_document(self):
        doc_id = '100215-0-38bc1d29bd22844103e86f9a000500e2' 
        r = API.DQISResult(API.Database(dbname="dqis"))
        r['id'] = doc_id
        doc = r.get_document()
        self.assertEqual(doc.run, 100215)
        doc_id = '' 
        r = DQISResult(Database(dbname="dqis"))
        r['id'] = doc_id
        fdoc = r.get_document 
        fdoc()
        self.assertRaises(DQISResultNotSavable, fdoc) # because get and s
class ContinuousSummaryHistogramTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        random.seed()
        self.histogramDB = Database(dbname = "histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs):
        """
        _buildRandomNumberList_

        Builds a list with n pseudorandomly distributed
        numbers according to some given distribution
        """
        numberList = []
        if not kwargs:
            kwargs = {"mu" : 0, "sigma" : 1}
        for _ in range(n):
            generator = getattr(random, distribution)
            numberList.append(generator(**kwargs))

        return numberList

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of uniformly
        distributed pseudorandom numbers. Check
        that the statistic properties
        in the histogram are accurate to some degree,
        that the histogram binning is done right and
        that this can become a document an uploaded to couch
        """
        inputData = self.buildRandomNumberList(1000)

        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')

        # Populate the histogram
        for point in inputData:
            histogram.addPoint(point)

        # Get the JSON
        jsonHistogram = histogram.toJSON()

        # Check the histogram core data
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertTrue(jsonHistogram["continuous"])

        # Check the internal data
        self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel")
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("TestHisto")
        self.assertEqual(len(storedJSON["data"]), 16)

        return

    def testB_extremeData(self):
        """
        _testB_extremeData_

        Put extreme points in the data and try to build a histogram.
        Check that it can process all this correctly
        """

        # First no data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertEqual(jsonHistogram["average"], 0.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 0)

        # Data with NaNs and Infs
        inputData = self.buildRandomNumberList(100)
        inputData.append(float('NaN'))
        inputData.append(float('Inf'))
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        # One single point, P5
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        histogram.addPoint(5)
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["average"], 5.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 1)
        self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1)

        # Test that toJSON is idempotent
        inputData = self.buildRandomNumberList(100)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        oldData = jsonHistogram["data"]
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["data"], oldData)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        return

    def testC_compactHistogram(self):
        """
        _testC_compactHistogram_

        Check that we can create smaller histograms objects
        by chopping outliers and dropping the data all together
        """

        # Input normally distributed data and chop anything above 1 stdev (32% of data)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               dropOutliers = True, sigmaLimit = 1)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)
        pointsInHistogram = sum([x for x in viewvalues(jsonHistogram["data"])])

        # With high probability we must have chopped at least one point
        self.assertTrue(pointsInHistogram < 1000)
        self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1)

        # Create a histogram without histogram data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               storeHistogram = False)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 0)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        return
示例#10
0
class database:
    logger = logfactory

    class DatabaseNotFoundException(Exception):
        def __init__(self,  db=''):
            self.db = str(db)
            database.logger.error('Database "%s" was not found.' % (self.db), level='critical')

        def __str__(self):
            return 'Error: Database ',  self.db,  ' was not found.'

    class DatabaseAccessError(Exception):
        def __init__(self,  db=''):
            self.db = str(db)
            database.logger.error('Could not access database "%s".' % (self.db), level='critical')

        def __str__(self):
            return 'Error: Could not access database ',  self.db

    class DocumentNotFoundException(Exception):
        def __init__(self,  name=''):
            self.name = name
            database.logger.error('Document "%s" was not found.' % (self.name))

        def __str__(self):
            return 'Error: Document ',  self.name,  ' was not found.'

    class MapReduceSyntaxError(Exception):
        def __init__(self,  query=''):
            self.query = query
            database.logger.error('Invalid query <%s>' % (self.query))

        def __str__(self):
            return 'Error: Invalid query "' + self.query + '"'

    class InvalidOperatorError(Exception):
        def __init__(self,  op=''):
            self.op = str(op)
        def __str__(self):
            return 'Error: Operator "' + self.op + '" is invalid.'
    class InvalidParameterError(Exception):
        def __init__(self,  param=''):
            self.param = str(param)
        def __str__(self):
            return 'Error: Invalid Parameter: ' + self.param

    cache_dictionary = defaultdict(lambda: None)

    def __init__(self,  db_name='',url=None, cache=False):
        host = os.environ['HOSTNAME'] 
        if url == None:
            url =locator().dbLocation()
        #self.logger.log('I chose the url %s'%(url))
        if not db_name:
            raise self.DatabaseNotFoundException(db_name)
        self.db_name = db_name
        self.cache = cache
        if self.db_name in ['campaigns','chained_campaigns']:
            ## force cache for those.
            self.cache=True

        try:    
            self.db = Database(db_name, url=url)
            #            self.db = Database(db_name, url='http://preptest.cern.ch:5984/')
            #            self.db = Database(db_name) # for using private DB @localhost:5984
        except ValueError as ex:
            raise self.DatabaseAccessError(db_name)
            
        self.allowed_operators = ['<=',  '<',  '>=',  '>',  '==',  '~=']

    def __is_number(self, s):
        try:
            float(s)
            return True
        except ValueError:
            return False
       
    def get(self,  prepid=''):
        if self.cache:
            result = self.__get_from_cache(prepid)
            if result: return result

        self.logger.log('Looking for document "%s" in "%s"...' % (prepid,self.db_name))
        try:
            doc = self.db.document(id=prepid)
            if self.cache:
                self.__save_to_cache( prepid, doc)
            return doc
        except Exception as ex:
            self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex))
            return {}

    def __save_to_cache(self, key, value):
        from tools.locker import locker
        with locker.lock(key):
            self.cache_dictionary[key]=value

    def __get_from_cache(self, key):
        from tools.locker import locker
        with locker.lock(key):
            return self.cache_dictionary[key]

    def __document_exists(self,  doc):
        if not doc:
            self.logger.error('Trying to locate empty string.', level='warning')
            return False
        id = ''
        if 'prepid' not in doc:
            if '_id' not in doc:
                self.logger.error('Document does not have an "_id" parameter.', level='critical')
                return False
            id = doc['_id']
        elif '_id' not in doc:
            if 'prepid' not in doc:
                self.logger.error('Document does not have an "_id" parameter.', level='critical')
                return False
            id = doc['prepid']
        id = doc['_id']
        return self.__id_exists(prepid=id)

    def document_exists(self, prepid=''):
	self.logger.log('Checking existence of document "%s" in "%s"...' % (prepid,self.db_name))
        return self.__id_exists(prepid) 
    
    def __id_exists(self,  prepid=''):
        try:
            if self.cache and self.__get_from_cache(prepid) or self.db.documentExists(id=prepid):
                return True
            self.logger.error('Document "%s" does not exist.' % (prepid))
            return False  
        except CouchError as ex:
            self.logger.error('Document "%s" was not found on CouchError Reason: %s trying a second time with a time out' % (prepid, ex))
            time.sleep(0.5)
            return self.__id_exists(prepid)
        except Exception as ex:
            self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex))
            return False
    
    def delete(self, prepid=''):
        if not prepid:
            return False
        if not self.__id_exists(prepid):
            return False

        self.logger.log('Trying to delete document "%s"...' % (prepid))
        try:
            self.db.delete_doc(id=prepid)
            if self.cache:
                self.__save_to_cache(prepid, None)

            return True
        except Exception as ex:
            self.logger.error('Could not delete document: %s . Reason: %s ' % (prepid, ex))
            return False            

    def update(self,  doc={}):
        if '_id' in doc:
            self.logger.log('Updating document "%s" in "%s"' % (doc['_id'],self.db_name))
        if self.__document_exists(doc):
            if self.cache:
                ##JR the revision in the cache is not the one in the DB at this point
                # will be retaken at next get
                self.__save_to_cache(doc['_id'], None)
            return self.save(doc)
        self.logger.error('Failed to update document: %s' % (json.dumps(doc)))         
        return False
        
    def update_all(self,  docs=[]):
        if not docs:
            return False
            
        for doc in docs:
            if self.__document_exists(doc):
                self.db.queue(doc)
        try:
            self.db.commit()
            return True
        except Exception as ex:
            self.logger.error('Could not commit changes to database. Reason: %s' % (ex))
            return False        
        
    def get_all(self, page_num=-1): 
        try:
            limit, skip = self.__pagify(page_num)
            if limit >= 0 and skip >= 0: 
                result = self.db.loadView(self.db_name, "all", options={'limit':limit,'skip':skip, 'include_docs':True})['rows']
                res = map(lambda r : r['doc'], result)
                return res
            result = self.db.loadView(self.db_name, "all",options={'include_docs':True})['rows']
            res = map(lambda r : r['doc'], result)
            return res
        except Exception as ex:
            self.logger.error('Could not access view. Reason: %s' % (ex))
            return []

    
    def query(self,  query='', page_num=0):
        if not query:
            result = self.get_all(page_num)
            #res =  map(lambda r : r['doc'], result)
            return result
        try:
            result = self.__query(query, page=page_num)
            #res =  map(lambda r : r['doc'], result)
            return result
        except Exception as ex:
            self.logger.error('Could not load view for query: <%s> . Reason: %s' % (query, ex))
            return []

    def unique_res(self,query_result):
        docids = map(lambda doc : doc['_id'] , query_result)
        docids_s = list(set(docids))
        if len(docids) != len(docids_s):
            docids_s = []
            return_dict= copy.deepcopy( query_result )
            for doc in query_result:
                if not doc['_id'] in docids_s:
                    docids_s.append(doc['_id'])
                else:
                    return_dict.remove(doc)		
            return return_dict
        return query_result

    def queries( self, query_list):
        ##page_nume does not matter 
        if not len(query_list):
            return self.get_all(page_num=-1)
        try:

            results_list=[]
            ##make each query separately and retrieve only the doc with counting == len(query_list)
            for (i,query_item) in enumerate(query_list):
                res = self.query(query_item, page_num=-1)
                query_result = self.unique_res( res )
                if i!=0:
                    ## get only the one already in the intersection
                    id_list = map(lambda doc : doc['_id'], results_list)
                    results_list = filter(lambda doc : doc['_id'] in id_list, query_result)
                else:
                    results_list= query_result
            return results_list
        except Exception as ex:
            self.logger.error('Could not load view for queris: <%s> . Reason: %s' % ('<br>'.join(query_list), ex))
            return []

    def __extract_operators(self,  query=''):

        if not query:
            self.logger.error('Empty query', level='warning')
            return ()
        clean = []
        tokens = []
        for op in self.allowed_operators:
            if op in query:
                tokens = query.rsplit(op)
                tokens.insert(1,  op)
            else:
                continue
            for tok in tokens:
                if len(tok) < 1:
                    continue
                clean.append(tok.strip().strip('"'))
            if len(clean) != 3:
                raise self.MapReduceSyntaxError(query)
            #if clean[0] not in self.request and clean[1] not in self.campaign:
            #    raise self.IllegalParameterError(clean[0])
            return clean
        raise self.MapReduceSyntaxError(query)
    
    def __pagify(self, page_num=0, limit=20):
        if page_num < 0:
            return -1,0
        skip = limit*page_num
        return limit, skip      
    
    def __execute_query(self, tokenized_query='', page=-1, limit=20):
            tokens = []
            try:
                tokens = self.__extract_operators(tokenized_query)
            except Exception as ex:
                self.logger.error('Could not parse query. Reason: %s' % (ex))
                return []
            if tokens:
                view_name, view_opts = self.__build_query(tokens)
                if not view_name or not view_opts:
                    return []
                if page > -1:
                    view_opts['limit']=limit
                    view_opts['skip']=page*limit                    
                view_opts['include_docs']=True
                result = self.db.loadView(self.db_name, view_name, options=view_opts)['rows']
                res =  map(lambda r : r['doc'], result)
                return res
            else:
                return []
    
    def raw_query(self,  view_name,  options={}):
        self.logger.error('Executing raw query to the database. Accessed view: %s' % (view_name), level='warning') 
        return self.db.loadView(self.db_name,  view_name,  options)['rows']
                
    def __get_op(self, oper):
        if oper == '>':
            return lambda x,y: x > y
        elif oper == '>=':
            return lambda x,y: x >= y
        elif oper == '<':
            return lambda x,y: x < y
        elif oper == '<=':
            return lambda x,y: x <= y
        elif oper == '==':
            return lambda x,y: x == y       
        else:
            return None     
        
    def __filter(self, tokenized_query=[], view_results=[]):
        if len(tokenized_query) != 3:
            return view_results
        prn = tokenized_query[0]
        op = tokenized_query[1]
        if self.__is_number(tokenized_query[2]):
            val = float(tokenized_query[2])
        else:
            val = tokenized_query[2]
        f = self.__get_op(op)
        return filter(lambda x: f(x[prn],val), view_results)    

    def __query(self, query='', page=0, limit=20):
        t_par = []
        results = []
        #what is that , split for ???
        #if ',' in query:
        #     t_par = query.rsplit(',')
        if not t_par:
             t_par = [query]
        if len(t_par) == 1:          
            return self.__execute_query(t_par[0], page, limit)#[page*limit:page*limit+limit]
        elif len(t_par) == 0:
            return []

        #temp = self.__execute_query(t_par[0])#[page*limit:page*limit+limit]
        res = self.__execute_query(t_par[0])
        #res = map(lambda x: x['value'], temp) 
        if len(res) == 0:
            return []
        for i in range(1,len(t_par)):
            tq = self.__extract_operators(t_par[i])
            res = self.__filter(tq, res)
        #return map(lambda x: {'value':x},res[page*limit:page*limit+20])
        return res[page*limit:page*limit+20]
                    
    def __build_query(self,tokens=[]):
        if not tokens:
            return None,None
        if len(tokens) != 3:
            raise self.MapReduceSyntaxError(tokens)
        param = tokens[0]
        op = tokens[1]     
        kval = tokens[2]
        try:
            view_opts = self.__build_options(op, kval)
        except Exception as ex:
            self.logger.error('Value types are not compatible with operator %s value %s Error: %s' % (op, kval, str(ex))) 
            return None,None
        return param, view_opts
    
    def __build_options(self,op, val):
        def is_number(s):
            try:
                float(s)
                return True
            except ValueError:
                return False
        
        # options dictionary
        opts = {} 
        
        # default the composite key search
        #if '[' in val and ']' in val:
        if val.startswith('[') and val.endswith(']'):
            if op == '==':
                try:                    
                    e=ast.literal_eval(val)
                    opts['key'] = e
                except:
                    opts['key'] = val
            return opts
        
        # handle alphanumeric key ranges
        num_flag = False
        if is_number(val):
            num_flag = True
            kval = float(val)
        else:
            kval = val.decode('ascii')
        if '>' in op:
            if '=' in op:
                opts['startkey']=kval
            else:
                if num_flag:
                    opts['startkey']=kval+1
                else:
                    opts['startkey']=kval
            if num_flag:
                opts['endkey']=99999999 # assume its numeric
            else:
                opts['endkey']=kval+u'\u9999'
        elif '<' in op:
            if '=' in op:
                opts['endkey']=kval
            else:
                if num_flag:
                    opts['endkey']=kval-1
                else:
                    opts['endkey']=kval
            if num_flag:
                opts['startkey']=-99999999
            else:
                opts['startkey']=''
                
        elif '==' == op:
            opts['key']=kval
        elif '~=' == op:
            if kval[-1] == '*':
                opts['startkey']=kval[:len(kval)-1]
                opts['endkey']=kval[:len(kval)-1]+u'\u9999'#'99999999'#'\u9999'
        return opts
            
  
    def save_all(self,  docs=[]):
        if not docs:
            return False
        for doc in docs:
            self.db.queue(doc)
        try:
            self.db.commit()
            return True
        except Exception as ex:
            self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) 
            return False

    def save(self, doc={}):
        if not doc:
            self.logger.error('Tried to save empty document.', level='warning')
            return False


	# TODO: Check if an object exists in the database and fail.

        #if '_id' in doc:
        #    self.logger.log('Using user-defined id: %s' % (doc['_id']))
        #if self.__document_exists(doc):
        #    self.logger.error('Failed to update document: %s' % (json.dumps(doc)))
        #    return False

        try:
            #self.logger.error('Document is %s %s'%(doc['_id'],doc))
            #self.logger.error(self.db.commitOne(doc))
            ## this is a change I just made (23/05/2013 13:31) because of the return value of update should be True/False
            saved = self.db.commitOne(doc)
            if 'error' in saved[0]:
                self.logger.error('Commit One says : %s'%(saved))
                return False
            else:
                return True
        except Exception as ex:
            self.logger.error('Could not commit changes to database. Reason: %s' % (ex))
            return False

    def count(self):
        try:
            return len(self.db.allDocs()) 
        except Exception as ex:
            self.logger.error('Could not count documents in database. Reason: %s' % (ex))
            return -1 
def swapLocations(options):
    #Initialize stuff
    phedexAPI = PhEDEx({'cachepath' : options.cachepath})
    acdcCouch = Database('wmagent_acdc', options.acdcUrl)

    #Let's get the IDs of the ACDC documents for the task/request/group/user
    array = [options.group, options.user, options.request, options.task]
    result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array])

    documentsIDs = [x['id'] for x in result['rows']]

    #Load the map file saying what we want to change of location
    mapFile = open(options.map, 'r')
    locationMap = json.load(mapFile)
    mapFile.close()

    #Go through the documents
    for docID in documentsIDs:
        doc = acdcCouch.document(docID)

        #Are we going to change this doc? Better back it up
        if options.change:
            backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w')
            json.dump(doc, backupFile)
            backupFile.close()

        #Go through the files
        files = doc["files"]
        for inputFile in files:

            #Use PhEDEx API to get site based on the SE
            #Then map that to the desired target
            se = files[inputFile]["locations"][0]
            siteLocation = phedexAPI.getBestNodeName(se)
            targetLocation = locationMap.get(siteLocation, siteLocation)

            if siteLocation == targetLocation:
                #Nothing to do with this one, move on
                continue

            if not options.change:
                #No changes, then give the commands to move the files
                #Get the PFN for both the current location and the target location
                pfnDict = phedexAPI.getPFN(siteLocation, inputFile)
                inputPfn = pfnDict[(siteLocation, inputFile)]
                pfnDict = phedexAPI.getPFN(targetLocation, inputFile)
                targetPfn = pfnDict[(targetLocation, inputFile)]

                #Print it to stdout
                print "lcg-cp -D srmv2 -b %s %s" % (inputPfn, targetPfn)

            else:
                #This is changes time, let's move the stuff
                targetSE = phedexAPI.getNodeSE(targetLocation)
                files[inputFile]["locations"][0] = targetSE
                print "Changing location of %s from %s to %s" % (inputFile, se, targetSE)

        #If specified, commit the changes
        if options.change:
            acdcCouch.commitOne(doc)

    return 0
class DiscreteSummaryHistogramTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        self.histogramDB = Database(dbname = "histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of discrete data. Check
        that the statistic properties in the histogram are accurate,
        and that this can become a document an uploaded to couch
        """
        # Try and empty one
        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')
        histogramJSON = histogram.toJSON()

        self.assertEqual(histogramJSON["title"], "SomeTitle")
        self.assertEqual(histogramJSON["xLabel"], "Categories")
        self.assertFalse(histogramJSON["continuous"])
        self.assertEqual(len(histogramJSON["data"]), 0)
        self.assertEqual(histogramJSON["average"], {})
        self.assertEqual(histogramJSON["stdDev"], {})

        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')

        for _ in range(5):
            histogram.addPoint("CategoryA", "FeatureA")
            histogram.addPoint("CategoryB", "FeatureB")

        for _ in range(17):
            histogram.addPoint("CategoryA", "FeatureB")
            histogram.addPoint("CategoryC", "FeatureB")

        for _ in range(3):
            histogram.addPoint("CategoryC", "FeatureA")

        jsonHistogram = histogram.toJSON()

        # Average/stdDev per feature:
        # FeatureA: avg = 2.7 stdev = 2.05
        # FeatureB: avg = 13 stdev = 5.66
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"], 2.7, places = 1)
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"], 13, places = 1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"], 2.05, places = 1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"], 5.66, places = 1)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17)

        # Test couch
        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("SomeTitle")
        self.assertEqual(len(storedJSON["data"]), 3)

        return
示例#13
0
class DiscreteSummaryHistogramTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        self.histogramDB = Database(dbname="histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of discrete data. Check
        that the statistic properties in the histogram are accurate,
        and that this can become a document an uploaded to couch
        """
        # Try and empty one
        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')
        histogramJSON = histogram.toJSON()

        self.assertEqual(histogramJSON["title"], "SomeTitle")
        self.assertEqual(histogramJSON["xLabel"], "Categories")
        self.assertFalse(histogramJSON["continuous"])
        self.assertEqual(len(histogramJSON["data"]), 0)
        self.assertEqual(histogramJSON["average"], {})
        self.assertEqual(histogramJSON["stdDev"], {})

        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')

        for _ in range(5):
            histogram.addPoint("CategoryA", "FeatureA")
            histogram.addPoint("CategoryB", "FeatureB")

        for _ in range(17):
            histogram.addPoint("CategoryA", "FeatureB")
            histogram.addPoint("CategoryC", "FeatureB")

        for _ in range(3):
            histogram.addPoint("CategoryC", "FeatureA")

        jsonHistogram = histogram.toJSON()

        # Average/stdDev per feature:
        # FeatureA: avg = 2.7 stdev = 2.05
        # FeatureB: avg = 13 stdev = 5.66
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"],
                               2.7,
                               places=1)
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"],
                               13,
                               places=1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"],
                               2.05,
                               places=1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"],
                               5.66,
                               places=1)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17)

        # Test couch
        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("SomeTitle")
        self.assertEqual(len(storedJSON["data"]), 3)

        return
class ContinuousSummaryHistogramTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        random.seed()
        self.histogramDB = Database(dbname = "histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs):
        """
        _buildRandomNumberList_

        Builds a list with n pseudorandomly distributed
        numbers according to some given distribution
        """
        numberList = []
        if not kwargs:
            kwargs = {"mu" : 0, "sigma" : 1}
        for _ in range(n):
            generator = getattr(random, distribution)
            numberList.append(generator(**kwargs))

        return numberList

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of uniformly
        distributed pseudorandom numbers. Check
        that the statistic properties
        in the histogram are accurate to some degree,
        that the histogram binning is done right and
        that this can become a document an uploaded to couch
        """
        inputData = self.buildRandomNumberList(1000)

        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')

        # Populate the histogram
        for point in inputData:
            histogram.addPoint(point)

        # Get the JSON
        jsonHistogram = histogram.toJSON()

        # Check the histogram core data
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertTrue(jsonHistogram["continuous"])

        # Check the internal data
        self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel")
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("TestHisto")
        self.assertEqual(len(storedJSON["data"]), 16)

        return

    def testB_extremeData(self):
        """
        _testB_extremeData_

        Put extreme points in the data and try to build a histogram.
        Check that it can process all this correctly
        """

        # First no data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertEqual(jsonHistogram["average"], 0.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 0)

        # Data with NaNs and Infs
        inputData = self.buildRandomNumberList(100)
        inputData.append(float('NaN'))
        inputData.append(float('Inf'))
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        # One single point, P5
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        histogram.addPoint(5)
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["average"], 5.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 1)
        self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1)

        # Test that toJSON is idempotent
        inputData = self.buildRandomNumberList(100)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        oldData = jsonHistogram["data"]
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["data"], oldData)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        return

    def testC_compactHistogram(self):
        """
        _testC_compactHistogram_

        Check that we can create smaller histograms objects
        by chopping outliers and dropping the data all together
        """

        # Input normally distributed data and chop anything above 1 stdev (32% of data)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               dropOutliers = True, sigmaLimit = 1)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)
        pointsInHistogram = sum([x for x in jsonHistogram["data"].values()])

        # With high probability we must have chopped at least one point
        self.assertTrue(pointsInHistogram < 1000)
        self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1)

        # Create a histogram without histogram data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               storeHistogram = False)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 0)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        return