def getRegex(command, sessionKey, username, namespace): stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace) datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace) try: stanza = stanzas[command+"-command"] except: stanza = datatypes[command] syntax = regexRecurseSyntax(stanzas, stanza, datatypes, 10) printd("Original Syntax:" + str(stanza["syntax"])) printd("Recursed:" + syntax) return syntax
def getRegex(command, sessionKey, username, namespace): stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace) datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace) try: stanza = stanzas[command + "-command"] except: stanza = datatypes[command] syntax = regexRecurseSyntax(stanzas, stanza, datatypes, 10) printd("Original Syntax:" + str(stanza["syntax"])) printd("Recursed:" + syntax) return syntax
def getBNF(command, sessionKey, username, namespace): stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace) datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace) try: #print "stanzas:", stanzas.keys() stanza = stanzas[command+"-command"] except: # print "datatypes:", datatypes stanza = datatypes[command] datatypes = [] syntax = describer.recurseSyntax(command, stanzas, stanza, datatypes) printd("Original Syntax:" + str(stanza["syntax"])) printd("Recursed:" + syntax) return syntax
def didYouMeanFields(sessionKey, username, namespace, bnf, search): knownFields = utils.getStanzas("fields", sessionKey, username, namespace).stanzas.keys() knownFields = [field.lower() for field in knownFields] # lowercase knownfields # preserve the order, do not make a set searchFields = re.findall("([a-zA-Z0-9-_]+)=", search.lower()) suggested = [] suggestions = [] # for the last n incorrect fields show suggestions for field in reversed(searchFields): if len(suggestions) >= MAX_FIELD_SUGGESTIONS_PER_SEARCH: break # since it is a list, can contain duplicates. if field in suggested: continue if field not in knownFields: fieldmatches = difflib.get_close_matches( field, knownFields, n=MAX_SUGGESTIONS_PER_FIELD, cutoff=QUALITY_MATCH) if len(fieldmatches) > 0: #suggestions.append("Unknown field: '%s'. %s" % (field, formatSuggestions(fieldmatches, "field"))) suggestions.append("%s" % (formatSuggestions(fieldmatches, "field"))) suggested.append(field) return suggestions
def getBNF(command, sessionKey, username, namespace): stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace) datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace) try: #print "stanzas:", stanzas.keys() stanza = stanzas[command + "-command"] except: # print "datatypes:", datatypes stanza = datatypes[command] datatypes = [] syntax = describer.recurseSyntax(command, stanzas, stanza, datatypes) printd("Original Syntax:" + str(stanza["syntax"])) printd("Recursed:" + syntax) return syntax
def didYouMeanFields(sessionKey, username, namespace, bnf, search): knownFields = utils.getStanzas("fields", sessionKey, username, namespace).stanzas.keys() knownFields = [field.lower() for field in knownFields] # lowercase knownfields searchFields = set(re.findall("([a-zA-Z0-9-_]+)=", search.lower())) suggestions = [] for field in searchFields: if field not in knownFields: fieldmatches = difflib.get_close_matches(field, knownFields, cutoff=QUALITY_MATCH) if len(fieldmatches) > 0: #suggestions.append("Unknown field: '%s'. %s" % (field, formatSuggestions(fieldmatches, "field"))) suggestions.append("%s" % (formatSuggestions(fieldmatches, "field"))) return suggestions
def relatedSearches(output, sessionKey, namespace, user, search): """these saved searches are similar to your search""" savedsearches = utils.getStanzas("savedsearches", sessionKey, user, namespace) searchmap = {} for name in savedsearches: ssearch = savedsearches[name].get('search', None) if ssearch != None: searchmap[ssearch.lower()] = (name, ssearch) searches = searchmap.keys() bestmatches = difflib.get_close_matches(search.lower(), searches, cutoff=0.65) if len(bestmatches) == 0: return output['savedsearches'] = [(searchmap[match][0],searchmap[match][1]) for match in bestmatches if match!=search]
def _main(): if len(sys.argv) > 1: search = sys.argv[1] cmds = utils.getCommands(search, None) comms = [c.strip() for search in cmds for c,a in search ] args = [a.strip() for search in cmds for c,a in search ] print "Commands:", cmds print "Commands: %s Args: %s" % (comms, args) else: user = "******" sessionKey = utils.TEST_SESSION() namespace = utils.TEST_NAMESPACE() #print getPastSearches(user, None, sessionKey, namespace) bnf = utils.getStanzas("searchbnf", sessionKey, user, namespace) data, searches = getNextData(user, bnf, sessionKey, namespace) for cmd in data: print "\t%s" % cmd
def didYouMeanFields(sessionKey, username, namespace, bnf, search): knownFields = utils.getStanzas("fields", sessionKey, username, namespace).stanzas.keys() knownFields = [field.lower() for field in knownFields] # lowercase knownfields # preserve the order, do not make a set searchFields = re.findall("([a-zA-Z0-9-_]+)=", search.lower()) suggested = [] suggestions = [] # for the last n incorrect fields show suggestions for field in reversed(searchFields): if len(suggestions) >= MAX_FIELD_SUGGESTIONS_PER_SEARCH: break # since it is a list, can contain duplicates. if field in suggested: continue if field not in knownFields: fieldmatches = difflib.get_close_matches(field, knownFields, n=MAX_SUGGESTIONS_PER_FIELD, cutoff=QUALITY_MATCH) if len(fieldmatches) > 0: #suggestions.append("Unknown field: '%s'. %s" % (field, formatSuggestions(fieldmatches, "field"))) suggestions.append("%s" % (formatSuggestions(fieldmatches, "field"))) suggested.append(field) return suggestions
def _main(): argc = len(sys.argv) argv = sys.argv sessionKey = utils.TEST_SESSION() namespace = utils.TEST_NAMESPACE() username = '******' if len(argv) < 2: usage() cmd = argv[1] if argc == 3 and cmd != "parse": inputtxt = argv[2] bnf = getBNF(cmd, sessionKey, username, namespace) exp = getExp(bnf) next = getNext(exp, inputtxt) #regex = getRegex(cmd, sessionKey) datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace) print "bnf:\t", bnf print "exp:\t", exp print "next:\t", next print "regex:\t", exp.toRegex(datatypes) print "Simpleregex:\t", exp.toSimpleRegex(True) elif argc >= 3: bnf = argv[2] inputtxt = "" if argc == 4: inputtxt = argv[3] exp = getExp(bnf) next = getNext(exp, inputtxt) #print "exp:", exp #print "minMatchLen:", exp.minMatchLen() #print "next:", next print "%s\t%s" % (exp.minMatchLen(), bnf) else: usage()
def _main(): argc = len(sys.argv) argv = sys.argv sessionKey = utils.TEST_SESSION() namespace = utils.TEST_NAMESPACE() username = '******' if len(argv) < 2: usage() cmd = argv[1] if argc == 3 and cmd != "parse": inputtxt = argv[2] bnf = getBNF(cmd, sessionKey, username, namespace) exp = getExp(bnf) next = getNext(exp,inputtxt) #regex = getRegex(cmd, sessionKey) datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace) print "bnf:\t", bnf print "exp:\t", exp print "next:\t", next print "regex:\t", exp.toRegex(datatypes) print "Simpleregex:\t", exp.toSimpleRegex(True) elif argc >= 3: bnf = argv[2] inputtxt = "" if argc == 4: inputtxt = argv[3] exp = getExp(bnf) next = getNext(exp,inputtxt) #print "exp:", exp #print "minMatchLen:", exp.minMatchLen() #print "next:", next print "%s\t%s" % (exp.minMatchLen(), bnf) else: usage()
def doHelp(sessionKey, namespace, user, search, insertpos=None, earliest_time=None, latest_time=None, count=10, max_time=None, servers=None, useTypeahead=False, showCommandHelp=True, showCommandHistory=True, showFieldInfo=True): """ "did you mean ___?" "did you know ___?" "the 'sort' operator takes blah arguments and does blah" "you might also be interested in ___?" "the fields ___ can help narrow does these results" "these past searches are similar to your search" "these saved searches are similar to your search" "you are searching for ip and host and then deduplicating by host" "your search would be faster if you ..." """ originalsearch = search if insertpos == None: # no insertion point, use end insertpos = len(search) else: try: insertpos = int(insertpos) except: insertpos = len(search) search = search[:insertpos].strip() if search == "": search = "| search" elif not search.startswith("|"): search = "| " + search usersquery = originalsearch if usersquery.startswith("search "): usersquery = usersquery[len("search "):] queryprefix = utils.allButLast(usersquery) # defaults output = { 'notices': [], 'fields': [], 'args': [], 'nexts': [], 'autonexts':[], 'autocomplete':[], 'autocomplete_match':'', 'command':{}, 'typeahead': [], 'search': usersquery, 'searchprefix': queryprefix, 'allcommands': [], 'savedsearches': [], 'arg_typeahead':[], 'has_field_args':False} try: ## overallstart = start = time.time() bnf = utils.getStanzas("searchbnf", sessionKey, user, namespace) ################### ## now = time.time() ## timing_bnf = now - start ## start = now ################### output['allcommands'] = utils.getAllCommands(bnf, user, namespace) ################### ## now = time.time() ## timing_allcommands = now - start ## start = now ################### aliasMap = utils.getAliasMap(bnf) ################### ## now = time.time() ## timing_aliasmap = now - start ## start = now ################### if (splunk.util.normalizeBoolean(useTypeahead)): suggestSearchTypeahead(output, search, usersquery, count, max_time, earliest_time, latest_time, servers, namespace, user) ################### ## now = time.time() ## timing_typeahead = now - start ## start = now ################### firstTermShouldBeCommand(output, search, aliasMap) ################### ## now = time.time() ## timing_firstterm = now - start ## start = now ################### didYouMean.help(output, bnf, sessionKey, namespace, user, search, usersquery) ################### ## now = time.time() ## timing_didyoumean = now - start ## start = now ################### didYouKnow.help(output, aliasMap, user, search) ################### ## now = time.time() ## timing_didyouknow = now - start ## start = now ################### relatedPastSearches(output, user, search) ################### ## now = time.time() ## timing_relatedpastsearches = now - start ## start = now ################### relatedSearches(output, sessionKey, namespace, user, search) ################### ## now = time.time() ## timing_relatedsearches = now - start ## start = now ################### if (splunk.util.normalizeBoolean(showCommandHelp)): commandHelp(output, user, search, aliasMap, bnf) ################### ## now = time.time() ## timing_commandhelp = now - start ## start = now ################### nextCommand(output, sessionKey, namespace, user, search, usersquery, queryprefix, aliasMap, bnf, splunk.util.normalizeBoolean(showCommandHistory)) ################### ## now = time.time() ## timing_nextcommand = now - start ## start = now ################### relatedTerms(output, user, search) ################### ## now = time.time() ## timing_relatedterms = now - start ## start = now ################### if (splunk.util.normalizeBoolean(showFieldInfo)): fieldInfo.usefulFields(output, sessionKey, namespace, user, usersquery) ################### ## now = time.time() ## timing_usefulfields = now - start ## start = now ################### describeSearch(output, user, search) ################### ## now = time.time() ## timing_describesearch = now - start ## start = now ################### suggestOptimizations(output, user, search) ################### ## now = time.time() ## timing_optimize = now - start ## start = now ################### argTypeahead(output, sessionKey, namespace, user, bnf, search) ################### ## now = time.time() ## timing_argtypeahead = now - start ## start = now ################### ## overall_time = now - overallstart ## msg = "aliasmap=%6f, allcommands=%6f, argtypeahead=%6f, bnf=%6f, commandhelp=%6f, describesearch=%6f, didyouknow=%6f, didyoumean=%6f, firstterm=%6f, nextcommand=%6f, optimize=%6f, relatedpastsearches=%6f, relatedsearches=%6f, relatedterms=%6f, typeahead=%6f, usefulfields=%6f" % (timing_aliasmap, timing_allcommands, timing_argtypeahead, timing_bnf, timing_commandhelp, timing_describesearch, timing_didyouknow, timing_didyoumean, timing_firstterm, timing_nextcommand, timing_optimize, timing_relatedpastsearches, timing_relatedsearches, timing_relatedterms, timing_typeahead, timing_usefulfields) ## logger.error("SHELPER TIMING: %s overall=%6f -- %s" % (sessionKey, overall_time, msg)) except Exception, e: msg = "! Error in search assistant: %s" % e msg += traceback.format_exc() output['notices'].insert(0,msg) logger.error(msg)
def doHelp(sessionKey, namespace, user, search, insertpos=None, earliest_time=None, latest_time=None, count=10, max_time=None, servers=None, useTypeahead=False, showCommandHelp=True, showCommandHistory=True, showFieldInfo=True): """ "did you mean ___?" "did you know ___?" "the 'sort' operator takes blah arguments and does blah" "you might also be interested in ___?" "the fields ___ can help narrow does these results" "these past searches are similar to your search" "these saved searches are similar to your search" "you are searching for ip and host and then deduplicating by host" "your search would be faster if you ..." """ originalsearch = search if insertpos == None: # no insertion point, use end insertpos = len(search) else: try: insertpos = int(insertpos) except: insertpos = len(search) search = search[:insertpos].strip() if search == "": search = "| search" elif not search.startswith("|"): search = "| " + search usersquery = originalsearch if usersquery.startswith("search "): usersquery = usersquery[len("search "):] queryprefix = utils.allButLast(usersquery) # defaults output = { 'notices': [], 'fields': [], 'args': [], 'nexts': [], 'autonexts': [], 'autocomplete': [], 'autocomplete_match': '', 'command': {}, 'typeahead': [], 'search': usersquery, 'searchprefix': queryprefix, 'allcommands': [], 'savedsearches': [], 'arg_typeahead': [], 'has_field_args': False } try: ## overallstart = start = time.time() bnf = utils.getStanzas("searchbnf", sessionKey, user, namespace) ################### ## now = time.time() ## timing_bnf = now - start ## start = now ################### output['allcommands'] = utils.getAllCommands(bnf, user, namespace) ################### ## now = time.time() ## timing_allcommands = now - start ## start = now ################### aliasMap = utils.getAliasMap(bnf) ################### ## now = time.time() ## timing_aliasmap = now - start ## start = now ################### if (splunk.util.normalizeBoolean(useTypeahead)): suggestSearchTypeahead(output, search, usersquery, count, max_time, earliest_time, latest_time, servers, namespace, user) ################### ## now = time.time() ## timing_typeahead = now - start ## start = now ################### firstTermShouldBeCommand(output, search, aliasMap) ################### ## now = time.time() ## timing_firstterm = now - start ## start = now ################### didYouMean.help(output, bnf, sessionKey, namespace, user, search, usersquery) ################### ## now = time.time() ## timing_didyoumean = now - start ## start = now ################### didYouKnow.help(output, aliasMap, user, search) ################### ## now = time.time() ## timing_didyouknow = now - start ## start = now ################### relatedPastSearches(output, user, search) ################### ## now = time.time() ## timing_relatedpastsearches = now - start ## start = now ################### relatedSearches(output, sessionKey, namespace, user, search) ################### ## now = time.time() ## timing_relatedsearches = now - start ## start = now ################### if (splunk.util.normalizeBoolean(showCommandHelp)): commandHelp(output, user, search, aliasMap, bnf) ################### ## now = time.time() ## timing_commandhelp = now - start ## start = now ################### nextCommand(output, sessionKey, namespace, user, search, usersquery, queryprefix, aliasMap, bnf, splunk.util.normalizeBoolean(showCommandHistory)) ################### ## now = time.time() ## timing_nextcommand = now - start ## start = now ################### relatedTerms(output, user, search) ################### ## now = time.time() ## timing_relatedterms = now - start ## start = now ################### if (splunk.util.normalizeBoolean(showFieldInfo)): fieldInfo.usefulFields(output, sessionKey, namespace, user, usersquery) ################### ## now = time.time() ## timing_usefulfields = now - start ## start = now ################### describeSearch(output, user, search) ################### ## now = time.time() ## timing_describesearch = now - start ## start = now ################### suggestOptimizations(output, user, search) ################### ## now = time.time() ## timing_optimize = now - start ## start = now ################### argTypeahead(output, sessionKey, namespace, user, bnf, search) ################### ## now = time.time() ## timing_argtypeahead = now - start ## start = now ################### ## overall_time = now - overallstart ## msg = "aliasmap=%6f, allcommands=%6f, argtypeahead=%6f, bnf=%6f, commandhelp=%6f, describesearch=%6f, didyouknow=%6f, didyoumean=%6f, firstterm=%6f, nextcommand=%6f, optimize=%6f, relatedpastsearches=%6f, relatedsearches=%6f, relatedterms=%6f, typeahead=%6f, usefulfields=%6f" % (timing_aliasmap, timing_allcommands, timing_argtypeahead, timing_bnf, timing_commandhelp, timing_describesearch, timing_didyouknow, timing_didyoumean, timing_firstterm, timing_nextcommand, timing_optimize, timing_relatedpastsearches, timing_relatedsearches, timing_relatedterms, timing_typeahead, timing_usefulfields) ## logger.error("SHELPER TIMING: %s overall=%6f -- %s" % (sessionKey, overall_time, msg)) except Exception, e: msg = "! Error in search assistant: %s" % e msg += traceback.format_exc() output['notices'].insert(0, msg) logger.error(msg)