def worker(query, expire): """ Worker function which invoke DAS core to update cache for input query """ dascore = DASCore() status = dascore.call(query) return status
class testDASCore(unittest.TestCase): """ A test class for the DAS core module """ def setUp(self): """ set up DAS core module """ debug = 0 self.das = DASCore(debug=debug, multitask=False) config = deepcopy(das_readconfig()) dburi = config['mongodb']['dburi'] connection = MongoClient(dburi) connection.drop_database('das') def testAggregators(self): """test DASCore aggregators via zip service""" # test DAS workflow query = "file dataset=/ZMM/Summer11-DESIGN42_V11_428_SLHC1-v1/GEN-SIM | grep file.size | sum(file.size)" dquery = DASQuery(query) result = self.das.call(dquery) result = self.das.get_from_cache(dquery) result = [r for r in result][0] if 'das' in result: del result['das'] # strip off DAS info expect = {"function": "sum", "result": {"value": 5658838455}, "key": "file.size", "_id":0} # the result may have value == 'N/A' when test is run w/o certificates (travis) # in this cas we just skip it if result['result']['value'] != 'N/A': self.assertEqual(expect, result)
def worker(query, expire): """ Worker function which invoke DAS core to update cache for input query """ dascore = DASCore() status = dascore.call(query) return status
class testDASCore(unittest.TestCase): """ A test class for the DAS core module """ def setUp(self): """ set up DAS core module """ debug = 0 self.das = DASCore(debug=debug, multitask=False) config = deepcopy(das_readconfig()) dburi = config['mongodb']['dburi'] connection = Connection(dburi) connection.drop_database('das') def testAggregators(self): """test DASCore aggregators via zip service""" # test DAS workflow query = "zip=14850 | grep zip.code | count(zip.code)" dquery = DASQuery(query) result = self.das.call(dquery) result = self.das.get_from_cache(dquery) result = [r for r in result][0] if result.has_key('das'): del result['das'] # strip off DAS info expect = {"function": "count", "result": {"value": 1}, "key": "zip.code", "_id":0} self.assertEqual(expect, result) def testIPService(self): """test DASCore with IP service""" ipaddr = socket.gethostbyname('cmsweb.cern.ch') # test DAS workflow query = "ip=%s" % ipaddr dquery = DASQuery(query) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) # test results query = "ip=%s | grep ip.address" % ipaddr dquery = DASQuery(query) result = self.das.get_from_cache(dquery) result = [r for r in result][0] result = DotDict(result).get('ip.address') expect = ipaddr self.assertEqual(expect, result)
class testDASCore(unittest.TestCase): """ A test class for the DAS core module """ def setUp(self): """ set up DAS core module """ debug = 0 self.das = DASCore(debug=debug) config = deepcopy(das_readconfig()) dburi = config['mongodb']['dburi'] connection = Connection(dburi) connection.drop_database('das') def testAggregators(self): """test DASCore aggregators via zip service""" # test DAS workflow query = "zip=14850 | grep zip.Placemark.address | count(zip.Placemark.address)" dquery = DASQuery(query) result = self.das.call(dquery) result = self.das.get_from_cache(dquery) result = [r for r in result][0] expect = {"function": "count", "result": {"value": 1}, "key": "zip.Placemark.address", "_id":0} self.assertEqual(expect, result) def testIPService(self): """test DASCore with IP service""" # test DAS workflow query = "ip=137.138.141.145" dquery = DASQuery(query) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) # test results query = "ip=137.138.141.145 | grep ip.City" dquery = DASQuery(query) result = self.das.get_from_cache(dquery) result = [r for r in result][0] result = DotDict(result).get('ip.City') expect = 'Geneva' self.assertEqual(expect, result)
class Robot(object): """ DAS Robot (daemon) class to fetch data from provided URL/API and store them into DAS cache. """ def __init__(self, config=None, query=None, sleep=600): self.dascore = DASCore(config, nores=True) logdir = getarg(config, 'logdir', '/tmp') self.pidfile = os.path.join(logdir, 'robot-%s.pid' % genkey(query)) if (hasattr(os, "devnull")): devnull = os.devnull else: devnull = "/dev/null" self.stdin = devnull # we do not read from stdinput self.stdout = getarg(config, 'stdout', devnull) self.stderr = getarg(config, 'stderr', devnull) self.query = query self.sleep = sleep def daemonize(self): """ do the UNIX double-fork magic, see Stevens' "Advanced Programming in the UNIX Environment" for details (ISBN 0201563177) http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16 """ try: pid = os.fork() if pid > 0: # exit first parent sys.exit(0) except OSError as err: sys.stderr.write("fork #1 failed: %d (%s)\n" \ % (err.errno, err.strerror)) sys.exit(1) # decouple from parent environment os.chdir("/") os.umask(0) os.setsid() # do second fork try: pid = os.fork() if pid > 0: # exit from second parent sys.exit(0) except OSError as err: sys.stderr.write("fork #2 failed: %d (%s)\n" \ % (err.errno, err.strerror)) sys.exit(1) # redirect standard file descriptors sys.stdout.flush() sys.stderr.flush() stdi = file(self.stdin, 'r') stdo = file(self.stdout, 'a+') stde = file(self.stderr, 'a+', 0) os.dup2(stdi.fileno(), sys.stdin.fileno()) os.dup2(stdo.fileno(), sys.stdout.fileno()) os.dup2(stde.fileno(), sys.stderr.fileno()) # write pidfile atexit.register(self.delpid) pid = str(os.getpid()) file(self.pidfile, 'w+').write("%s\n" % pid) def delpid(self): """Delete PID file""" os.remove(self.pidfile) def start(self): """ Start the daemon """ # Check for a pidfile to see if the daemon already runs try: pidf = file(self.pidfile,'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = None if pid: message = "pidfile %s already exist. Daemon already running?\n" sys.stderr.write(message % self.pidfile) sys.exit(1) # Start the daemon self.daemonize() self.run() def stop(self): """ Stop the daemon """ # Get the pid from the pidfile try: pidf = file(self.pidfile, 'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = None if not pid: message = "pidfile %s does not exist. Daemon not running?\n" sys.stderr.write(message % self.pidfile) return # not an error in a restart # Try killing the daemon process try: while 1: os.kill(pid, SIGTERM) time.sleep(0.1) except OSError as err: if err.find("No such process") > 0: if os.path.exists(self.pidfile): os.remove(self.pidfile) else: print_exc(err) sys.exit(1) def restart(self): """ Restart the daemon """ self.stop() self.start() def status(self): """ Return status information about Robot instance. """ # Get the pid from the pidfile try: pidf = file(self.pidfile, 'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = None if not pid: message = "pidfile %s does not exist. Daemon not running?\n" sys.stderr.write(message % self.pidfile) return # not an error in a restart print "DAS populator information" print "PID :", pid print "pidfile:", self.pidfile print "stdin :", self.stdin print "stdout :", self.stdout print "stderr :", self.stderr print "sleep :", self.sleep print "query :", self.query def run(self): """ Method which will be called after the process has been daemonized by start() or restart(). """ if not self.query: print "DAS query is not provided" sys.exit(1) while True: self.dascore.call(self.query, add_to_analytics=False) time.sleep(self.sleep)
class testCMSFakeDataServices(unittest.TestCase): """ A test class for the DAS core module """ def setUp(self): """ set up DAS core module """ debug = 0 # read DAS config and make fake Mapping DB entry collname = 'test_collection' self.dasmerge = 'test_merge' self.dascache = 'test_cache' self.dasmr = 'test_mapreduce' self.collname = collname # config = deepcopy(das_readconfig()) config = das_readconfig() dburi = config['mongodb']['dburi'] self.dburi = dburi logger = PrintManager('TestCMSFakeDataServices', verbose=debug) self.base = 'http://localhost:8080' # URL of DASTestDataService self.expire = 100 config['logger'] = logger config['loglevel'] = debug config['verbose'] = debug config['mappingdb'] = dict(dburi=dburi, dbname='mapping', collname=collname) config['analyticsdb'] = dict(dbname='analytics', collname=collname, history=100) config['dasdb'] = {'dbname': 'das', 'cachecollection': self.dascache, 'mrcollection': self.dasmr, 'mergecollection': self.dasmerge} config['keylearningdb'] = {'collname': collname, 'dbname': 'keylearning'} config['parserdb'] = {'collname': collname, 'dbname': 'parser', 'enable': True, 'sizecap': 10000} config['services'] = ['dbs', 'phedex', 'sitedb', 'google_maps', 'ip'] # mongo parser self.mongoparser = ql_manager(config) config['mongoparser'] = self.mongoparser # setup DAS mapper self.mgr = DASMapping(config) # create fresh DB self.clear_collections() self.mgr.delete_db_collection() self.mgr.create_db() # Add fake mapping records self.add_service('ip', 'ip.yml') self.add_service('google_maps', 'google_maps.yml') self.add_service('dbs', 'dbs.yml') self.add_service('phedex', 'phedex.yml') self.add_service('sitedb', 'sitedb.yml') # create DAS handler self.das = DASCore(config) # start TestDataService self.server = Root(config) self.server.start() def add_service(self, system, ymlfile): """ Add Fake data service mapping records. We provide system name which match corresponding name in DASTestDataService and associated with this system YML map file. """ fname = os.path.join(DASPATH, 'services/maps/%s' % ymlfile) url = self.base + '/%s' % system for record in read_service_map(fname): record['url'] = url record['system'] = system self.mgr.add(record) for record in read_service_map(fname, 'notations'): record['system'] = system self.mgr.add(record) def clear_collections(self): """clean-up test collections""" conn = Connection(host=self.dburi) for dbname in ['mapping', 'analytics', 'das', 'parser', 'keylearning']: db = conn[dbname] if dbname != 'das': db.drop_collection(self.collname) else: db.drop_collection(self.dascache) db.drop_collection(self.dasmerge) db.drop_collection(self.dasmr) def tearDown(self): """Invoke after each test""" self.server.stop() # self.mgr.delete_db_collection() # self.clear_collections() def testDBSService(self): """test DASCore with test DBS service""" query = "primary_dataset=abc" # invoke query to fill DAS cache dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) query = "primary_dataset=abc" # invoke query to get results from DAS cache dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.get_from_cache(dquery, collection=self.dasmerge) result = [r for r in result] result = DotDict(result[0]).get('primary_dataset.name') expect = 'abc' self.assertEqual(expect, result) def testPhedexAndSiteDBServices(self): """test DASCore with test PhEDEx and SiteDB services""" query = "site=T3_US_Cornell" # invoke query to fill DAS cache dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) query = "site=T3_US_Cornell | grep site.name" # invoke query to get results from DAS cache dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.get_from_cache(dquery, collection=self.dasmerge) result = [r for r in result] expect = 'T3_US_Cornell' self.assertEqual(expect, DotDict(result[0]).get('site.name')) expect = ['_id', 'das_id', 'site', 'cache_id', 'das', 'qhash'] expect.sort() rkeys = result[0].keys() rkeys.sort() self.assertEqual(expect, rkeys) def testAggregators(self): """test DASCore aggregators via zip service""" query = "zip=1000" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) query = "zip=1000 | count(zip.place.city)" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.get_from_cache(dquery, collection=self.dasmerge) result = [r for r in result] expect = {"function": "count", "result": {"value": 2}, "key": "zip.place.city", "_id":0} self.assertEqual(expect, result[0]) def testIPService(self): """test DASCore with IP service""" query = "ip=137.138.141.145" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) query = "ip=137.138.141.145 | grep ip.address" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.get_from_cache(dquery, collection=self.dasmerge) result = [r for r in result] result = DotDict(result[0]).get('ip.address') expect = '137.138.141.145' self.assertEqual(expect, result) def testRecords(self): """test records DAS keyword with all services""" query = "ip=137.138.141.145" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) query = "site=T3_US_Cornell" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.call(dquery) expect = "ok" self.assertEqual(expect, result) query = "records | grep ip.address" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.get_from_cache(dquery, collection=self.dasmerge) result = [r for r in result] result = DotDict(result[0]).get('ip.address') expect = '137.138.141.145' self.assertEqual(expect, result) query = "records | grep site.name" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.get_from_cache(dquery, collection=self.dasmerge) result = [r for r in result] expect = 'T3_US_Cornell' self.assertEqual(expect, DotDict(result[0]).get('site.name')) query = "records" dquery = DASQuery(query, mongoparser=self.mongoparser) result = self.das.get_from_cache(dquery, collection=self.dasmerge) res = [] for row in result: if row.has_key('ip'): res.append(DotDict(row).get('ip.address')) if row.has_key('site'): for item in row['site']: if item.has_key('name') and item['name'] not in res: res.append(item['name']) res.sort() expect = ['137.138.141.145', 'T3_US_Cornell'] self.assertEqual(expect, res)
class Robot(object): """ DAS Robot (daemon) class to fetch data from provided URL/API and store them into DAS cache. """ def __init__(self, config=None, query=None, sleep=600): self.dascore = DASCore(config, nores=True) logdir = getarg(config, 'logdir', '/tmp') self.pidfile = os.path.join(logdir, 'robot-%s.pid' % genkey(query)) if (hasattr(os, "devnull")): devnull = os.devnull else: devnull = "/dev/null" self.stdin = devnull # we do not read from stdinput self.stdout = getarg(config, 'stdout', devnull) self.stderr = getarg(config, 'stderr', devnull) self.query = query self.sleep = sleep def daemonize(self): """ do the UNIX double-fork magic, see Stevens' "Advanced Programming in the UNIX Environment" for details (ISBN 0201563177) http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16 """ try: pid = os.fork() if pid > 0: # exit first parent sys.exit(0) except OSError as err: sys.stderr.write("fork #1 failed: %d (%s)\n" \ % (err.errno, err.strerror)) sys.exit(1) # decouple from parent environment os.chdir("/") os.umask(0) os.setsid() # do second fork try: pid = os.fork() if pid > 0: # exit from second parent sys.exit(0) except OSError as err: sys.stderr.write("fork #2 failed: %d (%s)\n" \ % (err.errno, err.strerror)) sys.exit(1) # redirect standard file descriptors sys.stdout.flush() sys.stderr.flush() stdi = file(self.stdin, 'r') stdo = file(self.stdout, 'a+') stde = file(self.stderr, 'a+', 0) os.dup2(stdi.fileno(), sys.stdin.fileno()) os.dup2(stdo.fileno(), sys.stdout.fileno()) os.dup2(stde.fileno(), sys.stderr.fileno()) # write pidfile atexit.register(self.delpid) pid = str(os.getpid()) file(self.pidfile, 'w+').write("%s\n" % pid) def delpid(self): """Delete PID file""" os.remove(self.pidfile) def start(self): """ Start the daemon """ # Check for a pidfile to see if the daemon already runs try: pidf = file(self.pidfile,'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = None if pid: message = "pidfile %s already exist. Daemon already running?\n" sys.stderr.write(message % self.pidfile) sys.exit(1) # Start the daemon self.daemonize() self.run() def stop(self): """ Stop the daemon """ # Get the pid from the pidfile try: pidf = file(self.pidfile, 'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = None if not pid: message = "pidfile %s does not exist. Daemon not running?\n" sys.stderr.write(message % self.pidfile) return # not an error in a restart # Try killing the daemon process try: while 1: os.kill(pid, SIGTERM) time.sleep(0.1) except OSError as err: if err.find("No such process") > 0: if os.path.exists(self.pidfile): os.remove(self.pidfile) else: print_exc(err) sys.exit(1) def restart(self): """ Restart the daemon """ self.stop() self.start() def status(self): """ Return status information about Robot instance. """ # Get the pid from the pidfile try: pidf = file(self.pidfile, 'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = None if not pid: message = "pidfile %s does not exist. Daemon not running?\n" sys.stderr.write(message % self.pidfile) return # not an error in a restart print("DAS populator information") print("PID :", pid) print("pidfile:", self.pidfile) print("stdin :", self.stdin) print("stdout :", self.stdout) print("stderr :", self.stderr) print("sleep :", self.sleep) print("query :", self.query) def run(self): """ Method which will be called after the process has been daemonized by start() or restart(). """ if not self.query: print("DAS query is not provided") sys.exit(1) while True: self.dascore.call(self.query) time.sleep(self.sleep)