Exemplo n.º 1
0
def getRegex(command, sessionKey, username, namespace):
    stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace)
    datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace)
    try:
        stanza = stanzas[command+"-command"]
    except:
        stanza = datatypes[command]

    syntax = regexRecurseSyntax(stanzas, stanza, datatypes, 10)
    printd("Original Syntax:" + str(stanza["syntax"]))
    printd("Recursed:" + syntax)
    return syntax
def getRegex(command, sessionKey, username, namespace):
    stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace)
    datatypes = utils.getStanzas("datatypesbnf", sessionKey, username,
                                 namespace)
    try:
        stanza = stanzas[command + "-command"]
    except:
        stanza = datatypes[command]

    syntax = regexRecurseSyntax(stanzas, stanza, datatypes, 10)
    printd("Original Syntax:" + str(stanza["syntax"]))
    printd("Recursed:" + syntax)
    return syntax
Exemplo n.º 3
0
def getBNF(command, sessionKey, username, namespace):
    stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace)
    datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace)
    try:
        #print "stanzas:", stanzas.keys()
        stanza = stanzas[command+"-command"]
    except:
        # print "datatypes:", datatypes
        stanza = datatypes[command]

    datatypes = []
    syntax = describer.recurseSyntax(command, stanzas, stanza, datatypes)
    printd("Original Syntax:" + str(stanza["syntax"]))
    printd("Recursed:" + syntax)
    return syntax
def didYouMeanFields(sessionKey, username, namespace, bnf, search):
    knownFields = utils.getStanzas("fields", sessionKey, username,
                                   namespace).stanzas.keys()
    knownFields = [field.lower()
                   for field in knownFields]  # lowercase knownfields
    # preserve the order, do not make a set
    searchFields = re.findall("([a-zA-Z0-9-_]+)=", search.lower())
    suggested = []
    suggestions = []
    # for the last n incorrect fields show suggestions
    for field in reversed(searchFields):
        if len(suggestions) >= MAX_FIELD_SUGGESTIONS_PER_SEARCH:
            break
        # since it is a list, can contain duplicates.
        if field in suggested:
            continue

        if field not in knownFields:
            fieldmatches = difflib.get_close_matches(
                field,
                knownFields,
                n=MAX_SUGGESTIONS_PER_FIELD,
                cutoff=QUALITY_MATCH)
            if len(fieldmatches) > 0:
                #suggestions.append("Unknown field: '%s'. %s" % (field, formatSuggestions(fieldmatches, "field")))
                suggestions.append("%s" %
                                   (formatSuggestions(fieldmatches, "field")))
                suggested.append(field)
    return suggestions
def getBNF(command, sessionKey, username, namespace):
    stanzas = utils.getStanzas("searchbnf", sessionKey, username, namespace)
    datatypes = utils.getStanzas("datatypesbnf", sessionKey, username,
                                 namespace)
    try:
        #print "stanzas:", stanzas.keys()
        stanza = stanzas[command + "-command"]
    except:
        # print "datatypes:", datatypes
        stanza = datatypes[command]

    datatypes = []
    syntax = describer.recurseSyntax(command, stanzas, stanza, datatypes)
    printd("Original Syntax:" + str(stanza["syntax"]))
    printd("Recursed:" + syntax)
    return syntax
Exemplo n.º 6
0
def didYouMeanFields(sessionKey, username, namespace, bnf, search):
    knownFields = utils.getStanzas("fields", sessionKey, username, namespace).stanzas.keys()
    knownFields = [field.lower() for field in knownFields] # lowercase knownfields
    searchFields = set(re.findall("([a-zA-Z0-9-_]+)=", search.lower()))
    suggestions = []
    for field in searchFields:
        if field not in knownFields:
            fieldmatches = difflib.get_close_matches(field, knownFields, cutoff=QUALITY_MATCH)
            if len(fieldmatches) > 0:
                #suggestions.append("Unknown field: '%s'. %s" % (field, formatSuggestions(fieldmatches, "field")))
                suggestions.append("%s" % (formatSuggestions(fieldmatches, "field")))
    return suggestions                                                        
def relatedSearches(output, sessionKey, namespace, user, search):
    """these saved searches are similar to your search"""

    savedsearches = utils.getStanzas("savedsearches", sessionKey, user, namespace)
    searchmap = {}
    for name in savedsearches:
        ssearch = savedsearches[name].get('search', None)
        if ssearch != None:
            searchmap[ssearch.lower()] = (name, ssearch)
    searches = searchmap.keys()
    bestmatches = difflib.get_close_matches(search.lower(), searches, cutoff=0.65)
    if len(bestmatches) == 0:
        return 
    
    output['savedsearches'] = [(searchmap[match][0],searchmap[match][1]) for match in bestmatches if match!=search]
Exemplo n.º 8
0
def relatedSearches(output, sessionKey, namespace, user, search):
    """these saved searches are similar to your search"""

    savedsearches = utils.getStanzas("savedsearches", sessionKey, user, namespace)
    searchmap = {}
    for name in savedsearches:
        ssearch = savedsearches[name].get('search', None)
        if ssearch != None:
            searchmap[ssearch.lower()] = (name, ssearch)
    searches = searchmap.keys()
    bestmatches = difflib.get_close_matches(search.lower(), searches, cutoff=0.65)
    if len(bestmatches) == 0:
        return 
    
    output['savedsearches'] = [(searchmap[match][0],searchmap[match][1]) for match in bestmatches if match!=search]
Exemplo n.º 9
0
def _main():
    if len(sys.argv) > 1:
        search = sys.argv[1]
        cmds = utils.getCommands(search, None)
        comms = [c.strip() for search in cmds for c,a in search ]
        args = [a.strip() for search in cmds for c,a in search ]
        print "Commands:", cmds
        print "Commands: %s  Args: %s" % (comms, args)
    else:
        user = "******"
        sessionKey = utils.TEST_SESSION()
        namespace = utils.TEST_NAMESPACE()
        #print getPastSearches(user, None, sessionKey, namespace)
        bnf = utils.getStanzas("searchbnf", sessionKey, user, namespace)
        data, searches = getNextData(user, bnf, sessionKey, namespace)
        for cmd in data:
            print "\t%s" % cmd
Exemplo n.º 10
0
def didYouMeanFields(sessionKey, username, namespace, bnf, search):
    knownFields = utils.getStanzas("fields", sessionKey, username,
                                   namespace).stanzas.keys()
    knownFields = [field.lower()
                   for field in knownFields]  # lowercase knownfields
    searchFields = set(re.findall("([a-zA-Z0-9-_]+)=", search.lower()))
    suggestions = []
    for field in searchFields:
        if field not in knownFields:
            fieldmatches = difflib.get_close_matches(field,
                                                     knownFields,
                                                     cutoff=QUALITY_MATCH)
            if len(fieldmatches) > 0:
                #suggestions.append("Unknown field: '%s'. %s" % (field, formatSuggestions(fieldmatches, "field")))
                suggestions.append("%s" %
                                   (formatSuggestions(fieldmatches, "field")))
    return suggestions
Exemplo n.º 11
0
def _main():
    if len(sys.argv) > 1:
        search = sys.argv[1]
        cmds = utils.getCommands(search, None)
        comms = [c.strip() for search in cmds for c,a in search ]
        args = [a.strip() for search in cmds for c,a in search ]
        print "Commands:", cmds
        print "Commands: %s  Args: %s" % (comms, args)
    else:
        user = "******"
        sessionKey = utils.TEST_SESSION()
        namespace = utils.TEST_NAMESPACE()
        #print getPastSearches(user, None, sessionKey, namespace)
        bnf = utils.getStanzas("searchbnf", sessionKey, user, namespace)
        data, searches = getNextData(user, bnf, sessionKey, namespace)
        for cmd in data:
            print "\t%s" % cmd
def didYouMeanFields(sessionKey, username, namespace, bnf, search):
    knownFields = utils.getStanzas("fields", sessionKey, username, namespace).stanzas.keys()
    knownFields = [field.lower() for field in knownFields] # lowercase knownfields
    # preserve the order, do not make a set
    searchFields = re.findall("([a-zA-Z0-9-_]+)=", search.lower())
    suggested = []
    suggestions = []
    # for the last n incorrect fields show suggestions
    for field in reversed(searchFields):
        if len(suggestions) >= MAX_FIELD_SUGGESTIONS_PER_SEARCH:
            break
        # since it is a list, can contain duplicates.
        if field in suggested:
            continue
        
        if field not in knownFields:
            fieldmatches = difflib.get_close_matches(field, knownFields, n=MAX_SUGGESTIONS_PER_FIELD, cutoff=QUALITY_MATCH)
            if len(fieldmatches) > 0:
                #suggestions.append("Unknown field: '%s'. %s" % (field, formatSuggestions(fieldmatches, "field")))
                suggestions.append("%s" % (formatSuggestions(fieldmatches, "field")))
                suggested.append(field)
    return suggestions                                                        
Exemplo n.º 13
0
def _main():
    argc = len(sys.argv)
    argv = sys.argv
    sessionKey = utils.TEST_SESSION()
    namespace = utils.TEST_NAMESPACE()
    username = '******'
    if len(argv) < 2:
        usage()
    cmd = argv[1]
    if argc == 3 and cmd != "parse":
        inputtxt = argv[2]
        bnf = getBNF(cmd, sessionKey, username, namespace)
        exp = getExp(bnf)
        next = getNext(exp, inputtxt)
        #regex = getRegex(cmd, sessionKey)

        datatypes = utils.getStanzas("datatypesbnf", sessionKey, username,
                                     namespace)

        print "bnf:\t", bnf
        print "exp:\t", exp
        print "next:\t", next
        print "regex:\t", exp.toRegex(datatypes)
        print "Simpleregex:\t", exp.toSimpleRegex(True)

    elif argc >= 3:
        bnf = argv[2]
        inputtxt = ""
        if argc == 4:
            inputtxt = argv[3]
        exp = getExp(bnf)
        next = getNext(exp, inputtxt)
        #print "exp:", exp
        #print "minMatchLen:", exp.minMatchLen()
        #print "next:", next
        print "%s\t%s" % (exp.minMatchLen(), bnf)
    else:
        usage()
Exemplo n.º 14
0
def _main():
    argc = len(sys.argv)
    argv = sys.argv
    sessionKey = utils.TEST_SESSION()
    namespace  = utils.TEST_NAMESPACE()
    username = '******'
    if len(argv) < 2:
        usage()
    cmd = argv[1]
    if argc == 3 and cmd != "parse":
        inputtxt = argv[2]        
        bnf = getBNF(cmd, sessionKey, username, namespace)
        exp = getExp(bnf)
        next = getNext(exp,inputtxt)
        #regex = getRegex(cmd, sessionKey)

        datatypes = utils.getStanzas("datatypesbnf", sessionKey, username, namespace)
        
        print "bnf:\t", bnf
        print "exp:\t", exp
        print "next:\t", next
        print "regex:\t", exp.toRegex(datatypes)
        print "Simpleregex:\t", exp.toSimpleRegex(True)        
            
    elif argc >= 3:
        bnf = argv[2]
        inputtxt = ""
        if argc == 4:
            inputtxt = argv[3]
        exp = getExp(bnf)
        next = getNext(exp,inputtxt)
        #print "exp:", exp
        #print "minMatchLen:", exp.minMatchLen()
        #print "next:", next
        print "%s\t%s" % (exp.minMatchLen(), bnf)
    else:
        usage()
def doHelp(sessionKey, namespace, user, search, insertpos=None, earliest_time=None, latest_time=None, count=10, max_time=None, servers=None,
         useTypeahead=False, showCommandHelp=True, showCommandHistory=True, showFieldInfo=True):
    """
    "did you mean ___?"
    "did you know ___?"
    "the 'sort' operator takes blah arguments and does blah"
    "you might also be interested in ___?"
    "the fields ___ can help narrow does these results"
    "these past searches are similar to your search"
    "these saved searches are similar to your search"
    "you are searching for ip and host and then deduplicating by host"
    "your search would be faster if you ..."
    """

    originalsearch = search
    if insertpos == None: # no insertion point, use end
        insertpos = len(search)
    else:
        try:
            insertpos = int(insertpos)
        except:
            insertpos = len(search)

    search = search[:insertpos].strip()

    if search == "":
        search = "| search"
    elif not search.startswith("|"):
        search = "| " + search

    usersquery = originalsearch
    if usersquery.startswith("search "):
        usersquery = usersquery[len("search "):]
    queryprefix = utils.allButLast(usersquery)
    # defaults
    output = { 'notices': [], 'fields': [], 'args': [], 'nexts': [], 'autonexts':[], 'autocomplete':[], 'autocomplete_match':'', 'command':{}, 'typeahead': [],
               'search': usersquery, 'searchprefix': queryprefix, 'allcommands': [], 'savedsearches': [], 'arg_typeahead':[], 'has_field_args':False}
    try:
        
        ## overallstart = start = time.time()

        bnf = utils.getStanzas("searchbnf", sessionKey, user, namespace)

        ###################
        ## now = time.time()
        ## timing_bnf = now - start
        ## start = now
        ###################
        
        output['allcommands'] = utils.getAllCommands(bnf, user, namespace)

        ###################
        ## now = time.time()
        ## timing_allcommands = now - start
        ## start = now
        ###################
        
        aliasMap = utils.getAliasMap(bnf)

        ###################
        ## now = time.time()
        ## timing_aliasmap = now - start
        ## start = now
        ###################
        
        if (splunk.util.normalizeBoolean(useTypeahead)):
            suggestSearchTypeahead(output, search, usersquery, count, max_time, earliest_time, latest_time, servers, namespace, user)

        ###################
        ## now = time.time()
        ## timing_typeahead = now - start
        ## start = now
        ###################            
        
        firstTermShouldBeCommand(output, search, aliasMap)

        ###################
        ## now = time.time()
        ## timing_firstterm = now - start
        ## start = now
        ###################            
        
        didYouMean.help(output, bnf, sessionKey, namespace, user, search, usersquery)

        ###################
        ## now = time.time()
        ## timing_didyoumean = now - start
        ## start = now
        ###################            
        
        didYouKnow.help(output, aliasMap, user, search)

        ###################
        ## now = time.time()
        ## timing_didyouknow = now - start
        ## start = now
        ###################            
        
        relatedPastSearches(output, user, search)

        ###################
        ## now = time.time()
        ## timing_relatedpastsearches = now - start
        ## start = now
        ###################            
        
        relatedSearches(output, sessionKey, namespace, user, search)

        ###################
        ## now = time.time()
        ## timing_relatedsearches = now - start
        ## start = now
        ###################            

        if (splunk.util.normalizeBoolean(showCommandHelp)):
            commandHelp(output, user, search, aliasMap, bnf)

        ###################
        ## now = time.time()
        ## timing_commandhelp = now - start
        ## start = now
        ###################            
    
        nextCommand(output, sessionKey, namespace, user, search, usersquery, queryprefix, aliasMap, bnf, splunk.util.normalizeBoolean(showCommandHistory))

        ###################
        ## now = time.time()
        ## timing_nextcommand = now - start
        ## start = now
        ###################            
        
        relatedTerms(output, user, search)

        ###################
        ## now = time.time()
        ## timing_relatedterms = now - start
        ## start = now
        ###################            
        
        if (splunk.util.normalizeBoolean(showFieldInfo)):
            fieldInfo.usefulFields(output, sessionKey, namespace, user, usersquery)


        ###################
        ## now = time.time()
        ## timing_usefulfields = now - start
        ## start = now
        ###################            

            
        describeSearch(output, user, search)


        ###################
        ## now = time.time()
        ## timing_describesearch = now - start
        ## start = now
        ###################            


        suggestOptimizations(output, user, search)


        ###################
        ## now = time.time()
        ## timing_optimize = now - start
        ## start = now
        ###################            
        
        argTypeahead(output, sessionKey, namespace, user, bnf, search)

        ###################
        ## now = time.time()
        ## timing_argtypeahead = now - start
        ## start = now
        ###################            

        ## overall_time = now - overallstart
        ## msg = "aliasmap=%6f, allcommands=%6f, argtypeahead=%6f, bnf=%6f, commandhelp=%6f, describesearch=%6f, didyouknow=%6f, didyoumean=%6f, firstterm=%6f, nextcommand=%6f, optimize=%6f, relatedpastsearches=%6f, relatedsearches=%6f, relatedterms=%6f, typeahead=%6f, usefulfields=%6f" % (timing_aliasmap, timing_allcommands, timing_argtypeahead, timing_bnf, timing_commandhelp, timing_describesearch, timing_didyouknow, timing_didyoumean, timing_firstterm, timing_nextcommand, timing_optimize, timing_relatedpastsearches, timing_relatedsearches, timing_relatedterms, timing_typeahead, timing_usefulfields)
        ## logger.error("SHELPER TIMING: %s overall=%6f -- %s" % (sessionKey, overall_time, msg))
        
    except Exception, e:
        msg = "! Error in search assistant: %s" % e
        msg += traceback.format_exc()
        output['notices'].insert(0,msg)

        logger.error(msg)
def doHelp(sessionKey,
           namespace,
           user,
           search,
           insertpos=None,
           earliest_time=None,
           latest_time=None,
           count=10,
           max_time=None,
           servers=None,
           useTypeahead=False,
           showCommandHelp=True,
           showCommandHistory=True,
           showFieldInfo=True):
    """
    "did you mean ___?"
    "did you know ___?"
    "the 'sort' operator takes blah arguments and does blah"
    "you might also be interested in ___?"
    "the fields ___ can help narrow does these results"
    "these past searches are similar to your search"
    "these saved searches are similar to your search"
    "you are searching for ip and host and then deduplicating by host"
    "your search would be faster if you ..."
    """

    originalsearch = search
    if insertpos == None:  # no insertion point, use end
        insertpos = len(search)
    else:
        try:
            insertpos = int(insertpos)
        except:
            insertpos = len(search)

    search = search[:insertpos].strip()

    if search == "":
        search = "| search"
    elif not search.startswith("|"):
        search = "| " + search

    usersquery = originalsearch
    if usersquery.startswith("search "):
        usersquery = usersquery[len("search "):]
    queryprefix = utils.allButLast(usersquery)
    # defaults
    output = {
        'notices': [],
        'fields': [],
        'args': [],
        'nexts': [],
        'autonexts': [],
        'autocomplete': [],
        'autocomplete_match': '',
        'command': {},
        'typeahead': [],
        'search': usersquery,
        'searchprefix': queryprefix,
        'allcommands': [],
        'savedsearches': [],
        'arg_typeahead': [],
        'has_field_args': False
    }
    try:

        ## overallstart = start = time.time()

        bnf = utils.getStanzas("searchbnf", sessionKey, user, namespace)

        ###################
        ## now = time.time()
        ## timing_bnf = now - start
        ## start = now
        ###################

        output['allcommands'] = utils.getAllCommands(bnf, user, namespace)

        ###################
        ## now = time.time()
        ## timing_allcommands = now - start
        ## start = now
        ###################

        aliasMap = utils.getAliasMap(bnf)

        ###################
        ## now = time.time()
        ## timing_aliasmap = now - start
        ## start = now
        ###################

        if (splunk.util.normalizeBoolean(useTypeahead)):
            suggestSearchTypeahead(output, search, usersquery, count, max_time,
                                   earliest_time, latest_time, servers,
                                   namespace, user)

        ###################
        ## now = time.time()
        ## timing_typeahead = now - start
        ## start = now
        ###################

        firstTermShouldBeCommand(output, search, aliasMap)

        ###################
        ## now = time.time()
        ## timing_firstterm = now - start
        ## start = now
        ###################

        didYouMean.help(output, bnf, sessionKey, namespace, user, search,
                        usersquery)

        ###################
        ## now = time.time()
        ## timing_didyoumean = now - start
        ## start = now
        ###################

        didYouKnow.help(output, aliasMap, user, search)

        ###################
        ## now = time.time()
        ## timing_didyouknow = now - start
        ## start = now
        ###################

        relatedPastSearches(output, user, search)

        ###################
        ## now = time.time()
        ## timing_relatedpastsearches = now - start
        ## start = now
        ###################

        relatedSearches(output, sessionKey, namespace, user, search)

        ###################
        ## now = time.time()
        ## timing_relatedsearches = now - start
        ## start = now
        ###################

        if (splunk.util.normalizeBoolean(showCommandHelp)):
            commandHelp(output, user, search, aliasMap, bnf)

        ###################
        ## now = time.time()
        ## timing_commandhelp = now - start
        ## start = now
        ###################

        nextCommand(output, sessionKey, namespace, user, search, usersquery,
                    queryprefix, aliasMap, bnf,
                    splunk.util.normalizeBoolean(showCommandHistory))

        ###################
        ## now = time.time()
        ## timing_nextcommand = now - start
        ## start = now
        ###################

        relatedTerms(output, user, search)

        ###################
        ## now = time.time()
        ## timing_relatedterms = now - start
        ## start = now
        ###################

        if (splunk.util.normalizeBoolean(showFieldInfo)):
            fieldInfo.usefulFields(output, sessionKey, namespace, user,
                                   usersquery)

        ###################
        ## now = time.time()
        ## timing_usefulfields = now - start
        ## start = now
        ###################

        describeSearch(output, user, search)

        ###################
        ## now = time.time()
        ## timing_describesearch = now - start
        ## start = now
        ###################

        suggestOptimizations(output, user, search)

        ###################
        ## now = time.time()
        ## timing_optimize = now - start
        ## start = now
        ###################

        argTypeahead(output, sessionKey, namespace, user, bnf, search)

        ###################
        ## now = time.time()
        ## timing_argtypeahead = now - start
        ## start = now
        ###################

        ## overall_time = now - overallstart
        ## msg = "aliasmap=%6f, allcommands=%6f, argtypeahead=%6f, bnf=%6f, commandhelp=%6f, describesearch=%6f, didyouknow=%6f, didyoumean=%6f, firstterm=%6f, nextcommand=%6f, optimize=%6f, relatedpastsearches=%6f, relatedsearches=%6f, relatedterms=%6f, typeahead=%6f, usefulfields=%6f" % (timing_aliasmap, timing_allcommands, timing_argtypeahead, timing_bnf, timing_commandhelp, timing_describesearch, timing_didyouknow, timing_didyoumean, timing_firstterm, timing_nextcommand, timing_optimize, timing_relatedpastsearches, timing_relatedsearches, timing_relatedterms, timing_typeahead, timing_usefulfields)
        ## logger.error("SHELPER TIMING: %s overall=%6f -- %s" % (sessionKey, overall_time, msg))

    except Exception, e:
        msg = "! Error in search assistant: %s" % e
        msg += traceback.format_exc()
        output['notices'].insert(0, msg)

        logger.error(msg)