dwfuncts.BATCHSIZE = config.getint('dw', 'batchsize') #size of each bulk indexing operation (no.profiles) dwfuncts.SINGLEINDEXLIMIT = config.getint('dw', 'singleindexlimit') #number of profiles before bulk indexing takes over from single dwfuncts.INDEX = config.get('dw', 'index') dwfuncts.TYPE = config.get('dw', 'type') dwfuncts.ADDRESS = config.get('dw', 'address') #address of elastic search server dwfuncts.PORT = config.get('dw', 'port') server = pyes.es.ES(server="%s:%s" % (dwfuncts.ADDRESS, dwfuncts.PORT)) #initialises the conection to elastic search if args.debug: logger.setLevel(logging.DEBUG) if not(args.noindex): if not os.path.isdir('../Profiles/.git'): logger.debug("qdw: Initialising index and populating git repo") dwfuncts.indexinstall(logger, server) #inits index and populates else: logger.debug("qdw: Existing index found, updating") dwfuncts.updater(logger, server) #updates the index if (args.distributionofx is not(None)): logger.debug("qdw: Requested distribution") path = args.distributionofx[0].strip('/').replace("/",".") logger.debug("qdw: path set to %s" % (path)) count = dwfuncts.indexcount(logger) logger.debug("qdw: count returned as %s" % (count)) jsonquery={ "fields":[""], #dont return any of the source "size": count, #gets number of files in the profiles dir and set as the max no. results to return "query" : { "match_all" : {}
import argparse import json import os import dwfuncts parser = argparse.ArgumentParser(description="Search for data from within the server profiles. The program treats the profiles as file systems therefore the path argument should reflect this. For example when producing a distribution of 'driver' the 'path' would be hardware/cards/nic/eth0/driver.") group = parser.add_mutually_exclusive_group() group.add_argument('-d','--distributionofx',nargs=1,metavar='path', type=str, help="Produces a frequency distribution of all the values within 'path' (as json formatted string if --prettyprint not used)") group.add_argument('-f','--frequencyofx', nargs=2, metavar=('path', 'value'), help="Produces a list of machines, all of which have 'value' within 'path' (as json formatted string if --prettyprint not used)") parser.add_argument('-p','--prettyprint', action='store_true',help='Makes general outputs readable on the command line, not recommended for use as part of API') parser.add_argument('-n','--noindex', action='store_true', default=False, help='Forces the program not to index any profiles, this will increase speed but may not yeild accurate results. (NB.is neccesary if profiles are not stored locally)') args = parser.parse_args() if not(args.noindex): if not os.path.isdir('Profiles/.git'): dwfuncts.indexinstall() #inits index and populates else: dwfuncts.updater() #updates the index if (args.distributionofx is not(None)): path = args.distributionofx[0].strip('/').replace("/",".") count = dwfuncts.indexcount() jsonquery={ "fields":[""], #dont return any of the source "size":count, #gets number of files in the profiles dir and set as the max no. results to return "query" : { "match_all" : {} }, "facets" : { "tag" : { "terms" : {