class PLYSubmitter(Submitter): """ Submits queries to the DAS PLY yacc/lexx parser to check validity (no further submission is done). """ def __init__(self, producer, **kwargs): assert HAVE_DAS core = DASCore() parserdir = core.dasconfig['das']['parserdir'] dasservices = core.dasconfig['services'] daskeys = [] for val in core.mapping.daskeys().values(): for item in val: daskeys.append(item) self.dasply = DASPLY(parserdir, daskeys, dasservices) self.dasply.build() Submitter.__init__(self, producer, **kwargs) def submit(self, query): """submit query""" print "RAW: ", query ply = self.dasply.parser.parse(query) print "PLY: ", ply mongo = ply2mongo(ply) print "MONGO: ", mongo return True
def ply_output(query, keys, services, pdir='/tmp', verbose=False): """Print PLY/lexer output""" if verbose: dasply = DASPLY(pdir, keys, services, verbose=verbose) dasply.build() print "input query='%s'" % query dasply.test_lexer(query)
def test_spawn_manager(self): """Test spawn_queue function""" dasservices = ['dbs', 'dbs3'] daskeys = ['dataset'] parserdir = os.getcwd() query="dataset=/ZMM*/*/*" dasply = DASPLY(parserdir, daskeys, dasservices, verbose=0) dasply.build() ply_q1 = dasply.parser.parse(query) ply_q2 = spawn(dasply.parser.parse, query) self.assertEqual(ply_q1, ply_q2)
def __init__(self, config=None): if not config: config = das_readconfig() if not config.has_key('dasmapping'): config['dasmapping'] = DASMapping(config) if not config.has_key('dasanalytics'): config['dasanalytics'] = DASAnalytics(config) if not config['dasmapping'].check_maps(): msg = "No DAS maps found in MappingDB" raise Exception(msg) self.map = config['dasmapping'] self.analytics = config['dasanalytics'] self.dasservices = config['services'] self.daskeysmap = self.map.daskeys() self.operators = list(das_operators()) self.daskeys = list(das_special_keys()) self.verbose = config['verbose'] self.logger = PrintManager('QLManger', self.verbose) for val in self.daskeysmap.values(): for item in val: self.daskeys.append(item) parserdir = config['das']['parserdir'] self.dasply = DASPLY(parserdir, self.daskeys, self.dasservices, verbose=self.verbose) self.enabledb = config['parserdb']['enable'] if self.enabledb: self.parserdb = DASParserDB(config)
def ply_parse_query(query, keys, services, pdir='/tmp', verbose=False): """Get ply object for given query.""" dasply = DASPLY(pdir, keys, services, verbose=verbose) dasply.build() # ply_query = dasply.parser.parse(query) # ply_query = spawn(dasply.parser.parse, query) # return ply_query error = None for trial in xrange(1, 3): try: ply_query = dasply.parser.parse(query) return ply_query except Exception as exc: msg = "Fail to parse query=%s, trial=%s, exception=%s" \ % (query, trial, str(exc)) print dastimestamp('DAS WARNING ') + ' ' + msg error = exc time.sleep(trial/10.) raise error
def __init__(self, producer, **kwargs): assert HAVE_DAS core = DASCore() parserdir = core.dasconfig['das']['parserdir'] dasservices = core.dasconfig['services'] daskeys = [] for val in core.mapping.daskeys().values(): for item in val: daskeys.append(item) self.dasply = DASPLY(parserdir, daskeys, dasservices) self.dasply.build() Submitter.__init__(self, producer, **kwargs)
def setUp(self): """ set up DAS core module """ self.debug = 0 dassystems = ['dbs', 'sitedb', 'phedex', 'google_maps', 'postalcode', 'ip_services'] daskeys = ['dataset', 'file', 'block', 'run', 'site', 'latitude', 'longitude', 'city', 'ip', 'date', 'system', 'zip'] parserdir = '/tmp' self.dasply = DASPLY(parserdir, daskeys, dassystems, verbose=self.debug) args = {'errorlog' : ply.yacc.NullLogger()} self.dasply.build(**args) self.queries = {} query = "queries" mongo = {'fields': ['queries'], 'spec': {'queries': '*'}} self.queries[query] = mongo query = "popular queries" mongo = {'fields': ['popular', 'queries'], 'spec': {'queries': '*'}} self.queries[query] = mongo query = "popular queries date last 24h" mongo = {'fields': ['popular', 'queries'], 'spec': {'date': '24h'}} # date1 = time.time() - 24*60*60 # date2 = time.time() # mongo = {'fields': ['popular', 'queries'], # 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}} self.queries[query] = mongo query = "records" mongo = {'fields': ['records'], 'spec': {'records': '*'}} self.queries[query] = mongo query = "records site=T1_CH_CERN" mongo = {'fields': ['records'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "site=T1_CH_CERN" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "site site=T1_CH_CERN" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "site site=srm-cms.cern.ch" mongo = {'fields': ['site'], 'spec': {'site': 'srm-cms.cern.ch'}} self.queries[query] = mongo query = "site site=cmssrm.fnal.gov" mongo = {'fields': ['site'], 'spec': {'site': 'cmssrm.fnal.gov'}} self.queries[query] = mongo query = "site=T1_CH_CERN site" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "dataset=/a/b/c run=123 | grep dataset.size" mongo = {'filters': {'grep': ['dataset.size']}, 'fields': None, 'spec': {'dataset': '/a/b/c', 'run': 123}} self.queries[query] = mongo query = "site=T1_CH_CERN system=sitedb" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}, 'system': 'sitedb'} self.queries[query] = mongo query = "zip=10000 | grep zip.Placemark.address | count(zip.Placemark.address)" mongo = {'fields': ['zip'], 'spec': {'zip': 10000}, 'filters': {'grep': ['zip.Placemark.address']}, 'aggregators': [('count', 'zip.Placemark.address')] } self.queries[query] = mongo query = "city=Ithaca" mongo = {'fields': ['city'], 'spec': {'city': 'Ithaca'}} self.queries[query] = mongo query = "zip=14850" mongo = {'fields': ['zip'], 'spec': {'zip': 14850}} self.queries[query] = mongo query = "ip=137.138.141.145 | grep ip.City" mongo = {'fields': ['ip'], 'spec': {'ip': '137.138.141.145'}, 'filters': {'grep': ['ip.City']}} self.queries[query] = mongo query = 'latitude=11.1 longitude=-72' mongo = {'fields': None, 'spec':{'latitude':11.1, 'longitude': -72}} self.queries[query] = mongo query = "site=T1_CH_CERN" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "run=20853" mongo = {'fields': ['run'], 'spec': {'run': 20853}} self.queries[query] = mongo query = "run between [20853,20859]" mongo = {'fields': ['run'], 'spec': {'run': {'$gte': 20853, '$lte': 20859}}} self.queries[query] = mongo query = "file block=123 | grep file.size | sum(file.size)" mongo = {'fields': ['file'], 'spec': {'block': 123}, 'filters': {'grep': ['file.size']}, 'aggregators': [('sum', 'file.size')]} self.queries[query] = mongo query = "block=/a/b/RECO#9f5c396b-b6a1" mongo = {'fields': ['block'], 'spec': {'block': '/a/b/RECO#9f5c396b-b6a1'}} self.queries[query] = mongo query = "block dataset=/W/a_2/RECO" mongo = {'fields': ['block'], 'spec': {'dataset': '/W/a_2/RECO'}} self.queries[query] = mongo query = "run date last 24h" # date1 = time.time() - 24*60*60 # date2 = time.time() # mongo = {'fields': ['run'], 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}} mongo = {'fields': ['run'], 'spec': {'date': '24h'}} self.queries[query] = mongo date1 = 20101201 date2 = 20101202 query = "run date between [%s, %s]" % (date1, date2) mongo = {'fields': ['run'], 'spec': {'date': {'$lte': long(1291248000), '$gte': long(1291161600)}}} self.queries[query] = mongo query = "dataset file=/a/b run between [1,2] | grep file.name, file.age | unique | sum(file.size),max(file.size)" mongo = {'fields': ['dataset'], 'spec': {'run': {'$lte': 2, '$gte': 1}, 'file': '/a/b'}, 'filters': {'grep': ['file.name', 'file.age'], 'unique': 1}, 'aggregators': [('sum', 'file.size'), ('max', 'file.size')]} self.queries[query] = mongo query = "city = camelCase" mongo = {'fields': ['city'], 'spec':{'city': 'camelCase'}} self.queries[query] = mongo query = "city = lowercase" mongo = {'fields': ['city'], 'spec':{'city': 'lowercase'}} self.queries[query] = mongo query = "city = 'two words'" mongo = {'fields': ['city'], 'spec':{'city': 'two words'}} self.queries[query] = mongo query = 'city = "two words"' mongo = {'fields': ['city'], 'spec':{'city': 'two words'}} self.queries[query] = mongo #query=DASKEY query = 'city = dataset' mongo = {'fields': ['city'], 'spec':{'city': 'dataset'}} self.queries[query] = mongo #query=DASKEYtext query = 'city = datasetPostfix' mongo = {'fields': ['city'], 'spec':{'city': 'datasetPostfix'}} self.queries[query] = mongo #query=OPERATORtext (I don't expect query=OPERATOR to ever work) query = 'city = betweenPostfix' mongo = {'fields': ['city'], 'spec':{'city': 'betweenPostfix'}} self.queries[query] = mongo # query w/ filter which contains a key/value pair query = 'block=/a/b/c | grep site=T1 ' mongo = {'fields': ['block'], 'spec': {'block': '/a/b/c'}, 'filters': {'grep': ['site=T1']}} self.queries[query] = mongo # query w/ filter which contains a filter conditions query = 'run dataset=/a/b/c | grep run.run_number>1, run.run_number<10 ' mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 'filters': {'grep': ['run.run_number>1', 'run.run_number<10']}} self.queries[query] = mongo # query w/ filter which contains a filter conditions query = 'run dataset=/a/b/c | grep run.a>0, run.b>=0, run.c<=0' mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 'filters': {'grep': ['run.a>0', 'run.b>=0', 'run.c<=0']}} self.queries[query] = mongo # query with DASKEY, date=value query = 'dataset date=20110124' mongo = {'fields': ['dataset'], 'spec': {'date': 1295827200}} self.queries[query] = mongo # query with DASKEY, date between [value1, value2] query = 'dataset date between [20110124,20110126]' mongo = {'fields': ['dataset'], 'spec': {'date': {'$gte': 1295827200, '$lte': 1296000000}}} self.queries[query] = mongo query = 'file=abcdeasdf' mongo = {'fields': ['file'], 'spec': {'file': 'abcdeasdf'}} self.queries[query] = mongo query = 'file=abcdeasdf dataset=abcdes' mongo = {'fields': None, 'spec': {'file': 'abcdeasdf', 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'dataset date = 20080201' mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000}} self.queries[query] = mongo query = 'file dataset date = 20080201' mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000}} self.queries[query] = mongo query = 'dataset dataset=abcdes date = 20080201' mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'file dataset dataset=abcdes date = 20080201' mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'file=abcdeasdf file dataset dataset=abcdes date = 20080201' mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'file': 'abcdeasdf', 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'file dataset=bla | grep file.creation_time<20080201' mongo = {'fields': ['file'], 'spec': {'dataset': 'bla'}, 'filters': {'grep': ['file.creation_time<1201824000']}} self.queries[query] = mongo
class testDASPLY(unittest.TestCase): """ A test class for the DAS PLY parser/lexer """ def setUp(self): """ set up DAS core module """ self.debug = 0 dassystems = ['dbs', 'sitedb', 'phedex', 'google_maps', 'postalcode', 'ip_services'] daskeys = ['dataset', 'file', 'block', 'run', 'site', 'latitude', 'longitude', 'city', 'ip', 'date', 'system', 'zip'] parserdir = '/tmp' self.dasply = DASPLY(parserdir, daskeys, dassystems, verbose=self.debug) args = {'errorlog' : ply.yacc.NullLogger()} self.dasply.build(**args) self.queries = {} query = "queries" mongo = {'fields': ['queries'], 'spec': {'queries': '*'}} self.queries[query] = mongo query = "popular queries" mongo = {'fields': ['popular', 'queries'], 'spec': {'queries': '*'}} self.queries[query] = mongo query = "popular queries date last 24h" mongo = {'fields': ['popular', 'queries'], 'spec': {'date': '24h'}} # date1 = time.time() - 24*60*60 # date2 = time.time() # mongo = {'fields': ['popular', 'queries'], # 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}} self.queries[query] = mongo query = "records" mongo = {'fields': ['records'], 'spec': {'records': '*'}} self.queries[query] = mongo query = "records site=T1_CH_CERN" mongo = {'fields': ['records'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "site=T1_CH_CERN" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "site site=T1_CH_CERN" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "site site=srm-cms.cern.ch" mongo = {'fields': ['site'], 'spec': {'site': 'srm-cms.cern.ch'}} self.queries[query] = mongo query = "site site=cmssrm.fnal.gov" mongo = {'fields': ['site'], 'spec': {'site': 'cmssrm.fnal.gov'}} self.queries[query] = mongo query = "site=T1_CH_CERN site" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "dataset=/a/b/c run=123 | grep dataset.size" mongo = {'filters': {'grep': ['dataset.size']}, 'fields': None, 'spec': {'dataset': '/a/b/c', 'run': 123}} self.queries[query] = mongo query = "site=T1_CH_CERN system=sitedb" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}, 'system': 'sitedb'} self.queries[query] = mongo query = "zip=10000 | grep zip.Placemark.address | count(zip.Placemark.address)" mongo = {'fields': ['zip'], 'spec': {'zip': 10000}, 'filters': {'grep': ['zip.Placemark.address']}, 'aggregators': [('count', 'zip.Placemark.address')] } self.queries[query] = mongo query = "city=Ithaca" mongo = {'fields': ['city'], 'spec': {'city': 'Ithaca'}} self.queries[query] = mongo query = "zip=14850" mongo = {'fields': ['zip'], 'spec': {'zip': 14850}} self.queries[query] = mongo query = "ip=137.138.141.145 | grep ip.City" mongo = {'fields': ['ip'], 'spec': {'ip': '137.138.141.145'}, 'filters': {'grep': ['ip.City']}} self.queries[query] = mongo query = 'latitude=11.1 longitude=-72' mongo = {'fields': None, 'spec':{'latitude':11.1, 'longitude': -72}} self.queries[query] = mongo query = "site=T1_CH_CERN" mongo = {'fields': ['site'], 'spec': {'site': 'T1_CH_CERN'}} self.queries[query] = mongo query = "run=20853" mongo = {'fields': ['run'], 'spec': {'run': 20853}} self.queries[query] = mongo query = "run between [20853,20859]" mongo = {'fields': ['run'], 'spec': {'run': {'$gte': 20853, '$lte': 20859}}} self.queries[query] = mongo query = "file block=123 | grep file.size | sum(file.size)" mongo = {'fields': ['file'], 'spec': {'block': 123}, 'filters': {'grep': ['file.size']}, 'aggregators': [('sum', 'file.size')]} self.queries[query] = mongo query = "block=/a/b/RECO#9f5c396b-b6a1" mongo = {'fields': ['block'], 'spec': {'block': '/a/b/RECO#9f5c396b-b6a1'}} self.queries[query] = mongo query = "block dataset=/W/a_2/RECO" mongo = {'fields': ['block'], 'spec': {'dataset': '/W/a_2/RECO'}} self.queries[query] = mongo query = "run date last 24h" # date1 = time.time() - 24*60*60 # date2 = time.time() # mongo = {'fields': ['run'], 'spec': {'date': {'$gte': long(date1), '$lte': long(date2)}}} mongo = {'fields': ['run'], 'spec': {'date': '24h'}} self.queries[query] = mongo date1 = 20101201 date2 = 20101202 query = "run date between [%s, %s]" % (date1, date2) mongo = {'fields': ['run'], 'spec': {'date': {'$lte': long(1291248000), '$gte': long(1291161600)}}} self.queries[query] = mongo query = "dataset file=/a/b run between [1,2] | grep file.name, file.age | unique | sum(file.size),max(file.size)" mongo = {'fields': ['dataset'], 'spec': {'run': {'$lte': 2, '$gte': 1}, 'file': '/a/b'}, 'filters': {'grep': ['file.name', 'file.age'], 'unique': 1}, 'aggregators': [('sum', 'file.size'), ('max', 'file.size')]} self.queries[query] = mongo query = "city = camelCase" mongo = {'fields': ['city'], 'spec':{'city': 'camelCase'}} self.queries[query] = mongo query = "city = lowercase" mongo = {'fields': ['city'], 'spec':{'city': 'lowercase'}} self.queries[query] = mongo query = "city = 'two words'" mongo = {'fields': ['city'], 'spec':{'city': 'two words'}} self.queries[query] = mongo query = 'city = "two words"' mongo = {'fields': ['city'], 'spec':{'city': 'two words'}} self.queries[query] = mongo #query=DASKEY query = 'city = dataset' mongo = {'fields': ['city'], 'spec':{'city': 'dataset'}} self.queries[query] = mongo #query=DASKEYtext query = 'city = datasetPostfix' mongo = {'fields': ['city'], 'spec':{'city': 'datasetPostfix'}} self.queries[query] = mongo #query=OPERATORtext (I don't expect query=OPERATOR to ever work) query = 'city = betweenPostfix' mongo = {'fields': ['city'], 'spec':{'city': 'betweenPostfix'}} self.queries[query] = mongo # query w/ filter which contains a key/value pair query = 'block=/a/b/c | grep site=T1 ' mongo = {'fields': ['block'], 'spec': {'block': '/a/b/c'}, 'filters': {'grep': ['site=T1']}} self.queries[query] = mongo # query w/ filter which contains a filter conditions query = 'run dataset=/a/b/c | grep run.run_number>1, run.run_number<10 ' mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 'filters': {'grep': ['run.run_number>1', 'run.run_number<10']}} self.queries[query] = mongo # query w/ filter which contains a filter conditions query = 'run dataset=/a/b/c | grep run.a>0, run.b>=0, run.c<=0' mongo = {'fields': ['run'], 'spec': {'dataset': '/a/b/c'}, 'filters': {'grep': ['run.a>0', 'run.b>=0', 'run.c<=0']}} self.queries[query] = mongo # query with DASKEY, date=value query = 'dataset date=20110124' mongo = {'fields': ['dataset'], 'spec': {'date': 1295827200}} self.queries[query] = mongo # query with DASKEY, date between [value1, value2] query = 'dataset date between [20110124,20110126]' mongo = {'fields': ['dataset'], 'spec': {'date': {'$gte': 1295827200, '$lte': 1296000000}}} self.queries[query] = mongo query = 'file=abcdeasdf' mongo = {'fields': ['file'], 'spec': {'file': 'abcdeasdf'}} self.queries[query] = mongo query = 'file=abcdeasdf dataset=abcdes' mongo = {'fields': None, 'spec': {'file': 'abcdeasdf', 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'dataset date = 20080201' mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000}} self.queries[query] = mongo query = 'file dataset date = 20080201' mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000}} self.queries[query] = mongo query = 'dataset dataset=abcdes date = 20080201' mongo = {'fields': ['dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'file dataset dataset=abcdes date = 20080201' mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'file=abcdeasdf file dataset dataset=abcdes date = 20080201' mongo = {'fields': ['file', 'dataset'], 'spec': {'date': 1201824000, 'file': 'abcdeasdf', 'dataset': 'abcdes'}} self.queries[query] = mongo query = 'file dataset=bla | grep file.creation_time<20080201' mongo = {'fields': ['file'], 'spec': {'dataset': 'bla'}, 'filters': {'grep': ['file.creation_time<1201824000']}} self.queries[query] = mongo def test_instance(self): """Test appearance of instance in a DAS query""" query = 'dataset=/a/b/c instance=global' mongo = {'fields': ['dataset'], 'spec': {'dataset':'/a/b/c'}, 'instance':'global'} ply_query = self.dasply.parser.parse(query) result = ply2mongo(ply_query) self.assertEqual(mongo, result) def test_lexer(self): """Test DAS PLY lexer""" for query, expect in self.queries.items(): if self.debug: print("\n%s" % query) self.dasply.test_lexer(query) def test_parser(self): """Test DAS PLY parser""" for query, expect in self.queries.items(): try: ply_query = self.dasply.parser.parse(query) except: self.dasply.parser.parse(query, debug=1) print("Input query:", query) raise if self.debug: print() print("input query", query) print("ply query ", ply_query) result = ply2mongo(ply_query) self.assertEqual(expect, result) def test_parser_negate(self): """Test DAS PLY parser with negative results""" mongo = {} queries = {} query = 'run last 24h' queries[query] = mongo query = 'run last dataset' queries[query] = mongo query = 'dateset in 2010' queries[query] = mongo query = 'date in 24h' queries[query] = mongo query = 'date last [20101010,20101012]' queries[query] = mongo query = 'dataset in [/a/b/c,/c/d/e]' queries[query] = mongo query = 'detaset = /a/b/c' queries[query] = mongo query = 'detaset = /a/b/c dataset.size' queries[query] = mongo query = """dataset date in [20110124,20110126]""" queries[query] = mongo query = "run in [20853,20859]" self.queries[query] = mongo query = "dataset" # prevent usage of single keys self.queries[query] = mongo for query, expect in queries.items(): self.assertRaises(Exception, self.dasply.parser.parse, query)
class QLManager(object): """ DAS QL manager. """ def __init__(self, config=None): if not config: config = das_readconfig() if not config.has_key('dasmapping'): config['dasmapping'] = DASMapping(config) if not config.has_key('dasanalytics'): config['dasanalytics'] = DASAnalytics(config) if not config['dasmapping'].check_maps(): msg = "No DAS maps found in MappingDB" raise Exception(msg) self.map = config['dasmapping'] self.analytics = config['dasanalytics'] self.dasservices = config['services'] self.daskeysmap = self.map.daskeys() self.operators = list(das_operators()) self.daskeys = list(das_special_keys()) self.verbose = config['verbose'] self.logger = PrintManager('QLManger', self.verbose) for val in self.daskeysmap.values(): for item in val: self.daskeys.append(item) parserdir = config['das']['parserdir'] self.dasply = DASPLY(parserdir, self.daskeys, self.dasservices, verbose=self.verbose) self.enabledb = config['parserdb']['enable'] if self.enabledb: self.parserdb = DASParserDB(config) def parse(self, query): """ Parse input query and return query in MongoDB form. Optionally parsed query can be written into analytics DB. """ mongo_query = self.mongo_query(query) self.convert2skeys(mongo_query) return mongo_query def add_to_analytics(self, query, mongo_query): "Add DAS query to analytics DB" self.analytics.add_query(query, mongo_query) def mongo_query(self, query): """ Return mongo query for provided input query """ # NOTE: somehow I need to keep build call just before using # PLY parser, otherwise it fails to parse. self.dasply.build() if self.verbose: msg = "input query='%s'" % query self.logger.debug(msg) self.dasply.test_lexer(query) if self.enabledb: status, value = self.parserdb.lookup_query(query) if status == PARSERCACHE_VALID and \ len(last_key_pattern.findall(query)) == 0: mongo_query = value elif status == PARSERCACHE_INVALID: raise Exception(value) else: try: ply_query = self.dasply.parser.parse(query) mongo_query = ply2mongo(ply_query) self.parserdb.insert_valid_query(query, mongo_query) except Exception as exp: self.parserdb.insert_invalid_query(query, exp) print "Input query=%s" % query raise exp else: try: ply_query = self.dasply.parser.parse(query) mongo_query = ply2mongo(ply_query) except Exception as exc: msg = "Fail to convert input query='%s' into MongoDB format" \ % query print_exc(msg, print_traceback=False) raise exc if set(mongo_query.keys()) & set(['fields', 'spec']) != \ set(['fields', 'spec']): raise Exception('Invalid MongoDB query %s' % mongo_query) if not mongo_query['fields'] and len(mongo_query['spec'].keys()) > 1: raise Exception(ambiguous_msg(query, mongo_query['spec'].keys())) for key, val in mongo_query['spec'].iteritems(): if isinstance(val, list): raise Exception(ambiguos_val_msg(query, key, val)) return mongo_query def convert2skeys(self, mongo_query): """ Convert DAS input keys into DAS selection keys. """ if not mongo_query['spec']: for key in mongo_query['fields']: for system in self.map.list_systems(): mapkey = self.map.find_mapkey(system, key) if mapkey: mongo_query['spec'][mapkey] = '*' return spec = mongo_query['spec'] to_replace = [] for key, val in spec.iteritems(): for system in self.map.list_systems(): mapkey = self.map.find_mapkey(system, key, val) if mapkey and mapkey != key and \ mongo_query['spec'].has_key(key): to_replace.append((key, mapkey)) continue for key, mapkey in to_replace: if mongo_query['spec'].has_key(key): mongo_query['spec'][mapkey] = mongo_query['spec'][key] del mongo_query['spec'][key] def services(self, query): """Find out DAS services to use for provided query""" skeys, cond = decompose(query) if not skeys: skeys = [] if isinstance(skeys, str): skeys = [skeys] slist = [] # look-up services from Mapping DB for key in skeys + [i for i in cond.keys()]: for service, keys in self.daskeysmap.iteritems(): if service not in self.dasservices: continue value = cond.get(key, None) daskeys = self.map.find_daskey(service, key, value) if set(keys) & set(daskeys) and service not in slist: slist.append(service) # look-up special key condition requested_system = query.get('system', None) if requested_system: if isinstance(requested_system, str): requested_system = [requested_system] return list( set(slist) & set(requested_system) ) return slist def service_apis_map(self, query): """ Find out which APIs correspond to provided query. Return a map of found services and their apis. """ skeys, cond = decompose(query) if not skeys: skeys = [] if isinstance(skeys, str): skeys = [skeys] adict = {} mapkeys = [key for key in cond.keys() if key not in das_special_keys()] services = self.services(query) for srv in services: alist = self.map.list_apis(srv) for api in alist: daskeys = self.map.api_info(api)['daskeys'] maps = [r['map'] for r in daskeys] if set(mapkeys) & set(maps) == set(mapkeys): if adict.has_key(srv): new_list = adict[srv] + [api] adict[srv] = list( set(new_list) ) else: adict[srv] = [api] return adict def params(self, query): """ Return dictionary of parameters to be used in DAS Core: selection keys, conditions and services. """ skeys, cond = decompose(query) services = [] for srv in self.services(query): if srv not in services: services.append(srv) return dict(selkeys=skeys, conditions=cond, services=services)