class ReproducibleSeedingTests(unittest.TestCase): def setUp(self): self.testInit = TestInitCouchApp("ReproducibleSeedingTest") self.testInit.setupCouch("seeding_config_cache", "GroupUser", "ConfigCache") self.database = Database(self.testInit.couchDbName, self.testInit.couchUrl) self.documentId = None def tearDown(self): self.testInit.tearDownCouch() return def testA(self): """instantiate""" document = Document() document[u'pset_tweak_details'] = {} document[u'pset_tweak_details'][u'process'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed1'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed2'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed3'] = {} document = self.database.commitOne(document)[0] seeder = ReproducibleSeeding(CouchUrl = self.testInit.couchUrl, CouchDBName = self.testInit.couchDbName, ConfigCacheDoc = document[u'id']) job = Job("testjob") seeder(job) baggage = job.getBaggage() seed1 = getattr(baggage.process.RandomNumberGeneratorService, "seed1", None) self.failUnless(seed1 != None)
class CouchSink(object): """ Alert sink for pushing alerts to a couch database. """ def __init__(self, config): self.config = config logging.info("Instantiating ...") # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: logging.warn( "'%s' database does not exist on %s, creating it ..." % (self.config.database, self.config.url) ) server.createDatabase(self.config.database) logging.warn("Created.") logging.info("'%s' database exists on %s" % (self.config.database, self.config.url)) self.database = Database(self.config.database, self.config.url) logging.info("Initialized.") def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) logging.debug("Stored %s alerts to CouchDB, retVals: %s" % (len(alerts), retVals)) return retVals
class CouchSink(object): """ Alert sink for pushing alerts to a couch database. """ def __init__(self, config): self.config = config logging.info("Instantiating ...") # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: logging.warn( "'%s' database does not exist on %s, creating it ..." % (self.config.database, self.config.url)) server.createDatabase(self.config.database) logging.warn("Created.") logging.info("'%s' database exists on %s" % (self.config.database, self.config.url)) self.database = Database(self.config.database, self.config.url) logging.info("Initialized.") def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) logging.debug("Stored %s alerts to CouchDB, retVals: %s" % (len(alerts), retVals)) return retVals
class CouchSink(object): """ Alert sink for pushing alerts to a couch database. """ def __init__(self, config): self.config = config # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: server.createDatabase(self.config.database) self.database = Database(self.config.database, self.config.url) logging.debug("%s initialized." % self.__class__.__name__) def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) logging.debug("%s stored alerts, retVals: %s" % (self.__class__.__name__, retVals)) return retVals
def update_software(config_file): """ Functions retrieves CMSSW versions and scramarchs from CMS tag collector. """ config = loadConfigurationFile(config_file) # source of the data tag_collector_url = config.views.data.tag_collector_url # store the data into CouchDB auxiliary database under "software" document couch_host = config.views.data.couch_host reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db # get data from tag collector all_archs_and_versions = _get_all_scramarchs_and_versions( tag_collector_url) if not all_archs_and_versions: return # get data already stored in CouchDB couchdb = Database(dbname=reqmgr_aux_db, url=couch_host) try: sw_already_stored = couchdb.document("software") del sw_already_stored["_id"] del sw_already_stored["_rev"] except CouchNotFoundError: logging.error("Document id software, does not exist, creating it ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) return # now compare recent data from tag collector and what we already have stored # sorting is necessary if sorted(all_archs_and_versions) != sorted(sw_already_stored): logging.debug( "ScramArch/CMSSW releases changed, updating software document ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) """
def update_software(config_file): """ Functions retrieves CMSSW versions and scramarchs from CMS tag collector. """ config = loadConfigurationFile(config_file) # source of the data tag_collector_url = config.views.data.tag_collector_url # store the data into CouchDB auxiliary database under "software" document couch_host = config.views.data.couch_host reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db # get data from tag collector all_archs_and_versions = _get_all_scramarchs_and_versions(tag_collector_url) if not all_archs_and_versions: return # get data already stored in CouchDB couchdb = Database(dbname=reqmgr_aux_db, url=couch_host) try: sw_already_stored = couchdb.document("software") del sw_already_stored["_id"] del sw_already_stored["_rev"] except CouchNotFoundError: logging.error("Document id software, does not exist, creating it ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) return # now compare recent data from tag collector and what we already have stored # sorting is necessary if sorted(all_archs_and_versions) != sorted(sw_already_stored): logging.debug("ScramArch/CMSSW releases changed, updating software document ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) """
class CouchSink(object): """ Alert sink for pushing alerts to a couch database. """ def __init__(self, config): self.config = config self.database = Database(self.config.database, self.config.url) def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) return retVals
class TestDQISResult(unittest.TestCase): DB_NAME = 'dqis_test' DB_URL = 'localhost:5984' def setUp(self): couch = CouchServer(dburl=self.DB_URL) if self.DB_NAME in couch.listDatabases(): couch.deleteDatabase(self.DB_NAME) cdb = couch.connectDatabase(self.DB_NAME) #for dq_t in test_data.demo_data: # cdb.queue(dq_t) cdb.commit() self.db = Database(dbname=self.DB_NAME) def test_init(self): #self.assertEqual(1,2) pass def test_save_and_delete(self): #Shoud document get revision number after save? #Document can not be saved and then deleted. Because save returns not a DQISResult object! #Tests document saving document = {"_id": "abc", "test":"data"} r = API.DQISResult(dqis_db = self.db, dict = document) all_docs_count_before = len(self.db.allDocs()['rows']) r.save() all_docs_count_after_insert = len(self.db.allDocs()['rows']) self.assertEqual(all_docs_count_before +1, all_docs_count_after_insert) #Test delete doc = self.db.document("abc") r = API.DQISResult(dict=doc, dqis_db = self.db) self.assertEqual(doc["test"], "data") r.delete() self.db.commitOne(r) all_docs_count_after_deleting = len(self.db.allDocs()['rows']) self.assertEqual(all_docs_count_before, all_docs_count_after_deleting ) def test_savable(self): #Does ID has to raise exception rez = API.DQISResult(dict = {'_id': "123"})._require_savable() self.assertEqual(rez, None) self.assertRaises(DQISResultNotSavable, API.DQISResult(dict = {'id': "123"})._require_savable ) self.assertRaises(DQISResultNotSavable, API.DQISResult(dict = {'abc': "123"})._require_savable ) def test_find_id(self): #similar to test_savable self.assertEqual(DQISResult()._find_id(), "") self.assertEqual(DQISResult(dict = {'id': "123"})._find_id(), "123") self.assertEqual(DQISResult(dict = {'_id': "123"})._find_id(), "123") def test_find_id(self): id1 = API.DQISResult()._find_id() id2 = API.DQISResult(dict = {'id': "123"})._find_id() id3 = API.DQISResult(dict = {'_id': "abc"})._find_id() self.assertEqual(id1, "") self.assertEqual(id2, '123') self.assertEqual(id3, 'abc') def test_require_saveable(self): dr1 = API.DQISResult()._require_savable #dr2 = API.DQISResult(dict = {'_id': "123"})._require_savable self.assertRaises(DQISResultNotSavable, dr1) #self.assertEqual(None, dr2()) def test_save_to_queue(self): r = DQISResult(dqis_db = Database(), dict = {"_id": "abc"}) queue_size_before = len(r.dqis_db._queue) r.saveToQueue() queue_size_after = len(r.dqis_db._queue) self.assertEqual(queue_size_before, 0) self.assertEqual(queue_size_after, 1) r.dqis_db._reset_queue() def test_require_db(self): f = DQISResult()._require_db_connection self.assertRaises(DatabaseNotSetException, f) f = DQISResult(dqis_db = "dqis_db")._require_db_connection self.assertRaises(DatabaseNotSetException, f) f = DQISResult(dqis_db = Database())._require_db_connection self.assertEqual(None, f()) def test_get_document(self): doc_id = '100215-0-38bc1d29bd22844103e86f9a000500e2' r = API.DQISResult(API.Database(dbname="dqis")) r['id'] = doc_id doc = r.get_document() self.assertEqual(doc.run, 100215) doc_id = '' r = DQISResult(Database(dbname="dqis")) r['id'] = doc_id fdoc = r.get_document fdoc() self.assertRaises(DQISResultNotSavable, fdoc) # because get and s
class ContinuousSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") random.seed() self.histogramDB = Database(dbname = "histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs): """ _buildRandomNumberList_ Builds a list with n pseudorandomly distributed numbers according to some given distribution """ numberList = [] if not kwargs: kwargs = {"mu" : 0, "sigma" : 1} for _ in range(n): generator = getattr(random, distribution) numberList.append(generator(**kwargs)) return numberList def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of uniformly distributed pseudorandom numbers. Check that the statistic properties in the histogram are accurate to some degree, that the histogram binning is done right and that this can become a document an uploaded to couch """ inputData = self.buildRandomNumberList(1000) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') # Populate the histogram for point in inputData: histogram.addPoint(point) # Get the JSON jsonHistogram = histogram.toJSON() # Check the histogram core data self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertTrue(jsonHistogram["continuous"]) # Check the internal data self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel") self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("TestHisto") self.assertEqual(len(storedJSON["data"]), 16) return def testB_extremeData(self): """ _testB_extremeData_ Put extreme points in the data and try to build a histogram. Check that it can process all this correctly """ # First no data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertEqual(jsonHistogram["average"], 0.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 0) # Data with NaNs and Infs inputData = self.buildRandomNumberList(100) inputData.append(float('NaN')) inputData.append(float('Inf')) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) # One single point, P5 histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') histogram.addPoint(5) jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["average"], 5.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 1) self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1) # Test that toJSON is idempotent inputData = self.buildRandomNumberList(100) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() oldData = jsonHistogram["data"] jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["data"], oldData) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) return def testC_compactHistogram(self): """ _testC_compactHistogram_ Check that we can create smaller histograms objects by chopping outliers and dropping the data all together """ # Input normally distributed data and chop anything above 1 stdev (32% of data) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', dropOutliers = True, sigmaLimit = 1) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) pointsInHistogram = sum([x for x in viewvalues(jsonHistogram["data"])]) # With high probability we must have chopped at least one point self.assertTrue(pointsInHistogram < 1000) self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1) # Create a histogram without histogram data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', storeHistogram = False) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 0) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) return
class database: logger = logfactory class DatabaseNotFoundException(Exception): def __init__(self, db=''): self.db = str(db) database.logger.error('Database "%s" was not found.' % (self.db), level='critical') def __str__(self): return 'Error: Database ', self.db, ' was not found.' class DatabaseAccessError(Exception): def __init__(self, db=''): self.db = str(db) database.logger.error('Could not access database "%s".' % (self.db), level='critical') def __str__(self): return 'Error: Could not access database ', self.db class DocumentNotFoundException(Exception): def __init__(self, name=''): self.name = name database.logger.error('Document "%s" was not found.' % (self.name)) def __str__(self): return 'Error: Document ', self.name, ' was not found.' class MapReduceSyntaxError(Exception): def __init__(self, query=''): self.query = query database.logger.error('Invalid query <%s>' % (self.query)) def __str__(self): return 'Error: Invalid query "' + self.query + '"' class InvalidOperatorError(Exception): def __init__(self, op=''): self.op = str(op) def __str__(self): return 'Error: Operator "' + self.op + '" is invalid.' class InvalidParameterError(Exception): def __init__(self, param=''): self.param = str(param) def __str__(self): return 'Error: Invalid Parameter: ' + self.param cache_dictionary = defaultdict(lambda: None) def __init__(self, db_name='',url=None, cache=False): host = os.environ['HOSTNAME'] if url == None: url =locator().dbLocation() #self.logger.log('I chose the url %s'%(url)) if not db_name: raise self.DatabaseNotFoundException(db_name) self.db_name = db_name self.cache = cache if self.db_name in ['campaigns','chained_campaigns']: ## force cache for those. self.cache=True try: self.db = Database(db_name, url=url) # self.db = Database(db_name, url='http://preptest.cern.ch:5984/') # self.db = Database(db_name) # for using private DB @localhost:5984 except ValueError as ex: raise self.DatabaseAccessError(db_name) self.allowed_operators = ['<=', '<', '>=', '>', '==', '~='] def __is_number(self, s): try: float(s) return True except ValueError: return False def get(self, prepid=''): if self.cache: result = self.__get_from_cache(prepid) if result: return result self.logger.log('Looking for document "%s" in "%s"...' % (prepid,self.db_name)) try: doc = self.db.document(id=prepid) if self.cache: self.__save_to_cache( prepid, doc) return doc except Exception as ex: self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex)) return {} def __save_to_cache(self, key, value): from tools.locker import locker with locker.lock(key): self.cache_dictionary[key]=value def __get_from_cache(self, key): from tools.locker import locker with locker.lock(key): return self.cache_dictionary[key] def __document_exists(self, doc): if not doc: self.logger.error('Trying to locate empty string.', level='warning') return False id = '' if 'prepid' not in doc: if '_id' not in doc: self.logger.error('Document does not have an "_id" parameter.', level='critical') return False id = doc['_id'] elif '_id' not in doc: if 'prepid' not in doc: self.logger.error('Document does not have an "_id" parameter.', level='critical') return False id = doc['prepid'] id = doc['_id'] return self.__id_exists(prepid=id) def document_exists(self, prepid=''): self.logger.log('Checking existence of document "%s" in "%s"...' % (prepid,self.db_name)) return self.__id_exists(prepid) def __id_exists(self, prepid=''): try: if self.cache and self.__get_from_cache(prepid) or self.db.documentExists(id=prepid): return True self.logger.error('Document "%s" does not exist.' % (prepid)) return False except CouchError as ex: self.logger.error('Document "%s" was not found on CouchError Reason: %s trying a second time with a time out' % (prepid, ex)) time.sleep(0.5) return self.__id_exists(prepid) except Exception as ex: self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex)) return False def delete(self, prepid=''): if not prepid: return False if not self.__id_exists(prepid): return False self.logger.log('Trying to delete document "%s"...' % (prepid)) try: self.db.delete_doc(id=prepid) if self.cache: self.__save_to_cache(prepid, None) return True except Exception as ex: self.logger.error('Could not delete document: %s . Reason: %s ' % (prepid, ex)) return False def update(self, doc={}): if '_id' in doc: self.logger.log('Updating document "%s" in "%s"' % (doc['_id'],self.db_name)) if self.__document_exists(doc): if self.cache: ##JR the revision in the cache is not the one in the DB at this point # will be retaken at next get self.__save_to_cache(doc['_id'], None) return self.save(doc) self.logger.error('Failed to update document: %s' % (json.dumps(doc))) return False def update_all(self, docs=[]): if not docs: return False for doc in docs: if self.__document_exists(doc): self.db.queue(doc) try: self.db.commit() return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def get_all(self, page_num=-1): try: limit, skip = self.__pagify(page_num) if limit >= 0 and skip >= 0: result = self.db.loadView(self.db_name, "all", options={'limit':limit,'skip':skip, 'include_docs':True})['rows'] res = map(lambda r : r['doc'], result) return res result = self.db.loadView(self.db_name, "all",options={'include_docs':True})['rows'] res = map(lambda r : r['doc'], result) return res except Exception as ex: self.logger.error('Could not access view. Reason: %s' % (ex)) return [] def query(self, query='', page_num=0): if not query: result = self.get_all(page_num) #res = map(lambda r : r['doc'], result) return result try: result = self.__query(query, page=page_num) #res = map(lambda r : r['doc'], result) return result except Exception as ex: self.logger.error('Could not load view for query: <%s> . Reason: %s' % (query, ex)) return [] def unique_res(self,query_result): docids = map(lambda doc : doc['_id'] , query_result) docids_s = list(set(docids)) if len(docids) != len(docids_s): docids_s = [] return_dict= copy.deepcopy( query_result ) for doc in query_result: if not doc['_id'] in docids_s: docids_s.append(doc['_id']) else: return_dict.remove(doc) return return_dict return query_result def queries( self, query_list): ##page_nume does not matter if not len(query_list): return self.get_all(page_num=-1) try: results_list=[] ##make each query separately and retrieve only the doc with counting == len(query_list) for (i,query_item) in enumerate(query_list): res = self.query(query_item, page_num=-1) query_result = self.unique_res( res ) if i!=0: ## get only the one already in the intersection id_list = map(lambda doc : doc['_id'], results_list) results_list = filter(lambda doc : doc['_id'] in id_list, query_result) else: results_list= query_result return results_list except Exception as ex: self.logger.error('Could not load view for queris: <%s> . Reason: %s' % ('<br>'.join(query_list), ex)) return [] def __extract_operators(self, query=''): if not query: self.logger.error('Empty query', level='warning') return () clean = [] tokens = [] for op in self.allowed_operators: if op in query: tokens = query.rsplit(op) tokens.insert(1, op) else: continue for tok in tokens: if len(tok) < 1: continue clean.append(tok.strip().strip('"')) if len(clean) != 3: raise self.MapReduceSyntaxError(query) #if clean[0] not in self.request and clean[1] not in self.campaign: # raise self.IllegalParameterError(clean[0]) return clean raise self.MapReduceSyntaxError(query) def __pagify(self, page_num=0, limit=20): if page_num < 0: return -1,0 skip = limit*page_num return limit, skip def __execute_query(self, tokenized_query='', page=-1, limit=20): tokens = [] try: tokens = self.__extract_operators(tokenized_query) except Exception as ex: self.logger.error('Could not parse query. Reason: %s' % (ex)) return [] if tokens: view_name, view_opts = self.__build_query(tokens) if not view_name or not view_opts: return [] if page > -1: view_opts['limit']=limit view_opts['skip']=page*limit view_opts['include_docs']=True result = self.db.loadView(self.db_name, view_name, options=view_opts)['rows'] res = map(lambda r : r['doc'], result) return res else: return [] def raw_query(self, view_name, options={}): self.logger.error('Executing raw query to the database. Accessed view: %s' % (view_name), level='warning') return self.db.loadView(self.db_name, view_name, options)['rows'] def __get_op(self, oper): if oper == '>': return lambda x,y: x > y elif oper == '>=': return lambda x,y: x >= y elif oper == '<': return lambda x,y: x < y elif oper == '<=': return lambda x,y: x <= y elif oper == '==': return lambda x,y: x == y else: return None def __filter(self, tokenized_query=[], view_results=[]): if len(tokenized_query) != 3: return view_results prn = tokenized_query[0] op = tokenized_query[1] if self.__is_number(tokenized_query[2]): val = float(tokenized_query[2]) else: val = tokenized_query[2] f = self.__get_op(op) return filter(lambda x: f(x[prn],val), view_results) def __query(self, query='', page=0, limit=20): t_par = [] results = [] #what is that , split for ??? #if ',' in query: # t_par = query.rsplit(',') if not t_par: t_par = [query] if len(t_par) == 1: return self.__execute_query(t_par[0], page, limit)#[page*limit:page*limit+limit] elif len(t_par) == 0: return [] #temp = self.__execute_query(t_par[0])#[page*limit:page*limit+limit] res = self.__execute_query(t_par[0]) #res = map(lambda x: x['value'], temp) if len(res) == 0: return [] for i in range(1,len(t_par)): tq = self.__extract_operators(t_par[i]) res = self.__filter(tq, res) #return map(lambda x: {'value':x},res[page*limit:page*limit+20]) return res[page*limit:page*limit+20] def __build_query(self,tokens=[]): if not tokens: return None,None if len(tokens) != 3: raise self.MapReduceSyntaxError(tokens) param = tokens[0] op = tokens[1] kval = tokens[2] try: view_opts = self.__build_options(op, kval) except Exception as ex: self.logger.error('Value types are not compatible with operator %s value %s Error: %s' % (op, kval, str(ex))) return None,None return param, view_opts def __build_options(self,op, val): def is_number(s): try: float(s) return True except ValueError: return False # options dictionary opts = {} # default the composite key search #if '[' in val and ']' in val: if val.startswith('[') and val.endswith(']'): if op == '==': try: e=ast.literal_eval(val) opts['key'] = e except: opts['key'] = val return opts # handle alphanumeric key ranges num_flag = False if is_number(val): num_flag = True kval = float(val) else: kval = val.decode('ascii') if '>' in op: if '=' in op: opts['startkey']=kval else: if num_flag: opts['startkey']=kval+1 else: opts['startkey']=kval if num_flag: opts['endkey']=99999999 # assume its numeric else: opts['endkey']=kval+u'\u9999' elif '<' in op: if '=' in op: opts['endkey']=kval else: if num_flag: opts['endkey']=kval-1 else: opts['endkey']=kval if num_flag: opts['startkey']=-99999999 else: opts['startkey']='' elif '==' == op: opts['key']=kval elif '~=' == op: if kval[-1] == '*': opts['startkey']=kval[:len(kval)-1] opts['endkey']=kval[:len(kval)-1]+u'\u9999'#'99999999'#'\u9999' return opts def save_all(self, docs=[]): if not docs: return False for doc in docs: self.db.queue(doc) try: self.db.commit() return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def save(self, doc={}): if not doc: self.logger.error('Tried to save empty document.', level='warning') return False # TODO: Check if an object exists in the database and fail. #if '_id' in doc: # self.logger.log('Using user-defined id: %s' % (doc['_id'])) #if self.__document_exists(doc): # self.logger.error('Failed to update document: %s' % (json.dumps(doc))) # return False try: #self.logger.error('Document is %s %s'%(doc['_id'],doc)) #self.logger.error(self.db.commitOne(doc)) ## this is a change I just made (23/05/2013 13:31) because of the return value of update should be True/False saved = self.db.commitOne(doc) if 'error' in saved[0]: self.logger.error('Commit One says : %s'%(saved)) return False else: return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def count(self): try: return len(self.db.allDocs()) except Exception as ex: self.logger.error('Could not count documents in database. Reason: %s' % (ex)) return -1
def swapLocations(options): #Initialize stuff phedexAPI = PhEDEx({'cachepath' : options.cachepath}) acdcCouch = Database('wmagent_acdc', options.acdcUrl) #Let's get the IDs of the ACDC documents for the task/request/group/user array = [options.group, options.user, options.request, options.task] result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array]) documentsIDs = [x['id'] for x in result['rows']] #Load the map file saying what we want to change of location mapFile = open(options.map, 'r') locationMap = json.load(mapFile) mapFile.close() #Go through the documents for docID in documentsIDs: doc = acdcCouch.document(docID) #Are we going to change this doc? Better back it up if options.change: backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w') json.dump(doc, backupFile) backupFile.close() #Go through the files files = doc["files"] for inputFile in files: #Use PhEDEx API to get site based on the SE #Then map that to the desired target se = files[inputFile]["locations"][0] siteLocation = phedexAPI.getBestNodeName(se) targetLocation = locationMap.get(siteLocation, siteLocation) if siteLocation == targetLocation: #Nothing to do with this one, move on continue if not options.change: #No changes, then give the commands to move the files #Get the PFN for both the current location and the target location pfnDict = phedexAPI.getPFN(siteLocation, inputFile) inputPfn = pfnDict[(siteLocation, inputFile)] pfnDict = phedexAPI.getPFN(targetLocation, inputFile) targetPfn = pfnDict[(targetLocation, inputFile)] #Print it to stdout print "lcg-cp -D srmv2 -b %s %s" % (inputPfn, targetPfn) else: #This is changes time, let's move the stuff targetSE = phedexAPI.getNodeSE(targetLocation) files[inputFile]["locations"][0] = targetSE print "Changing location of %s from %s to %s" % (inputFile, se, targetSE) #If specified, commit the changes if options.change: acdcCouch.commitOne(doc) return 0
class DiscreteSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") self.histogramDB = Database(dbname = "histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of discrete data. Check that the statistic properties in the histogram are accurate, and that this can become a document an uploaded to couch """ # Try and empty one histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') histogramJSON = histogram.toJSON() self.assertEqual(histogramJSON["title"], "SomeTitle") self.assertEqual(histogramJSON["xLabel"], "Categories") self.assertFalse(histogramJSON["continuous"]) self.assertEqual(len(histogramJSON["data"]), 0) self.assertEqual(histogramJSON["average"], {}) self.assertEqual(histogramJSON["stdDev"], {}) histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') for _ in range(5): histogram.addPoint("CategoryA", "FeatureA") histogram.addPoint("CategoryB", "FeatureB") for _ in range(17): histogram.addPoint("CategoryA", "FeatureB") histogram.addPoint("CategoryC", "FeatureB") for _ in range(3): histogram.addPoint("CategoryC", "FeatureA") jsonHistogram = histogram.toJSON() # Average/stdDev per feature: # FeatureA: avg = 2.7 stdev = 2.05 # FeatureB: avg = 13 stdev = 5.66 self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"], 2.7, places = 1) self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"], 13, places = 1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"], 2.05, places = 1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"], 5.66, places = 1) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17) # Test couch # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("SomeTitle") self.assertEqual(len(storedJSON["data"]), 3) return
class DiscreteSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") self.histogramDB = Database(dbname="histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of discrete data. Check that the statistic properties in the histogram are accurate, and that this can become a document an uploaded to couch """ # Try and empty one histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') histogramJSON = histogram.toJSON() self.assertEqual(histogramJSON["title"], "SomeTitle") self.assertEqual(histogramJSON["xLabel"], "Categories") self.assertFalse(histogramJSON["continuous"]) self.assertEqual(len(histogramJSON["data"]), 0) self.assertEqual(histogramJSON["average"], {}) self.assertEqual(histogramJSON["stdDev"], {}) histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') for _ in range(5): histogram.addPoint("CategoryA", "FeatureA") histogram.addPoint("CategoryB", "FeatureB") for _ in range(17): histogram.addPoint("CategoryA", "FeatureB") histogram.addPoint("CategoryC", "FeatureB") for _ in range(3): histogram.addPoint("CategoryC", "FeatureA") jsonHistogram = histogram.toJSON() # Average/stdDev per feature: # FeatureA: avg = 2.7 stdev = 2.05 # FeatureB: avg = 13 stdev = 5.66 self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"], 2.7, places=1) self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"], 13, places=1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"], 2.05, places=1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"], 5.66, places=1) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17) # Test couch # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("SomeTitle") self.assertEqual(len(storedJSON["data"]), 3) return
class ContinuousSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") random.seed() self.histogramDB = Database(dbname = "histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs): """ _buildRandomNumberList_ Builds a list with n pseudorandomly distributed numbers according to some given distribution """ numberList = [] if not kwargs: kwargs = {"mu" : 0, "sigma" : 1} for _ in range(n): generator = getattr(random, distribution) numberList.append(generator(**kwargs)) return numberList def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of uniformly distributed pseudorandom numbers. Check that the statistic properties in the histogram are accurate to some degree, that the histogram binning is done right and that this can become a document an uploaded to couch """ inputData = self.buildRandomNumberList(1000) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') # Populate the histogram for point in inputData: histogram.addPoint(point) # Get the JSON jsonHistogram = histogram.toJSON() # Check the histogram core data self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertTrue(jsonHistogram["continuous"]) # Check the internal data self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel") self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("TestHisto") self.assertEqual(len(storedJSON["data"]), 16) return def testB_extremeData(self): """ _testB_extremeData_ Put extreme points in the data and try to build a histogram. Check that it can process all this correctly """ # First no data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertEqual(jsonHistogram["average"], 0.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 0) # Data with NaNs and Infs inputData = self.buildRandomNumberList(100) inputData.append(float('NaN')) inputData.append(float('Inf')) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) # One single point, P5 histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') histogram.addPoint(5) jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["average"], 5.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 1) self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1) # Test that toJSON is idempotent inputData = self.buildRandomNumberList(100) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() oldData = jsonHistogram["data"] jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["data"], oldData) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) return def testC_compactHistogram(self): """ _testC_compactHistogram_ Check that we can create smaller histograms objects by chopping outliers and dropping the data all together """ # Input normally distributed data and chop anything above 1 stdev (32% of data) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', dropOutliers = True, sigmaLimit = 1) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) pointsInHistogram = sum([x for x in jsonHistogram["data"].values()]) # With high probability we must have chopped at least one point self.assertTrue(pointsInHistogram < 1000) self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1) # Create a histogram without histogram data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', storeHistogram = False) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 0) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) return