def main(): """ main entry point for script """ opts = getoptions() logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', level=opts['log']) if sys.version.startswith("2.7"): logging.captureWarnings(True) config = Config(opts['config']) dwconfig = config.getsection("datawarehouse") dbif = DbHelper(dwconfig, 'modw_supremm.batchscripts') for resourcename, settings in config.resourceconfigs(): if opts['resource'] in (None, resourcename, str(settings['resource_id'])): logging.debug("Processing %s (id=%s)", resourcename, settings['resource_id']) if "script_dir" in settings: total = processfor(settings['resource_id'], settings['script_dir'], dbif, opts['deltadays']) logging.info("Processed %s files for %s", total, resourcename) else: logging.debug("Skip resource %s no script dir defined", resourcename) dbif.postinsert()
def default_settings(confpath): """ populate the default settings for the configuration. will use the values specifed in the configuration file if it exists. """ defaults = {} defaults['usexdmodconfig'] = 'y' defaults['xdmodpath'] = '/etc/xdmod' defaults['archiveoutdir'] = '/dev/shm/supremm' defaults['mysqlhostname'] = 'localhost' defaults['mysqlport'] = 3306 defaults['mysqlusername'] = '******' defaults['mycnffilename'] = '~/.supremm.my.cnf' defaults['mongouri'] = 'mongodb://localhost:27017/supremm' defaults['mongodb'] = 'supremm' try: existingconf = Config(confpath) rawconfig = existingconf._config.copy() if 'xdmodroot' in rawconfig and 'datawarehouse' in rawconfig and 'include' in rawconfig['datawarehouse']: defaults['usexdmodconfig'] = 'y' defaults['xdmodpath'] = rawconfig['xdmodroot'] else: dwconfig = existingconf.getsection('datawarehouse') defaults['usexdmodconfig'] = 'n' defaults['mysqlhostname'] = dwconfig['host'] defaults['mysqlport'] = dwconfig.get('port', defaults['mysqlport']) defaults['mycnffilename'] = dwconfig['defaultsfile'] try: mycnf = ConfigParser.RawConfigParser() mycnf.read(os.path.expanduser(dwconfig['defaultsfile'])) if mycnf.has_section('client'): defaults['mysqlusername'] = mycnf.get('client', 'user') except ConfigParser.Error: pass outputconfig = existingconf.getsection('outputdatabase') defaults['mongouri'] = outputconfig['uri'] defaults['mongodb'] = outputconfig['dbname'] summarycnf = existingconf.getsection('summary') defaults['archiveoutdir'] = summarycnf['archive_out_dir'] defaults['resources'] = rawconfig['resources'] except Exception as e: # ignore missing or broken existing config files. pass return defaults
def runingest(): if len(sys.argv) > 1: config = Config(sys.argv[1]) else: config = Config() if len(sys.argv) > 2: end_time = sys.argv[2] else: end_time = 9223372036854775807L #ingest(config, end_time) ingestall(config)
def runindexing(): """ main script entry point """ opts = getoptions() keep_csv = opts["keep_csv"] setuplogger(opts['log'], opts['debugfile'], filelevel=logging.INFO) config = Config(opts['config']) logging.info("archive indexer starting") pool = None if opts['num_threads'] > 1: logging.debug("Using %s processes", opts['num_threads']) pool = Pool(opts['num_threads']) for resourcename, resource in config.resourceconfigs(): if opts['resource'] in (None, resourcename, str(resource['resource_id'])): if not resource.get('pcp_log_dir'): continue acache = PcpArchiveProcessor(resource) afind = PcpArchiveFinder(opts['mindate'], opts['maxdate'], opts['all']) if pool is not None: index_resource_multiprocessing(config, resource, acache, afind, pool, keep_csv) else: fast_index_allowed = bool(resource.get("fast_index", False)) with LoadFileIndexUpdater(config, resource, keep_csv) as index: for archivefile, fast_index, hostname in afind.find( resource['pcp_log_dir']): start_time = time.time() data = acache.processarchive( archivefile, fast_index and fast_index_allowed, hostname) parse_end = time.time() if data is not None: index.insert(*data) db_end = time.time() logging.debug( "processed archive %s (fileio %s, dbacins %s)", archivefile, parse_end - start_time, db_end - parse_end) logging.info("archive indexer complete") if pool is not None: pool.close() pool.join()
def main(): """ print out config data according to cmdline args """ opts = getoptions() conf = Config() try: section = conf.getsection(opts['section']) if opts['item'] != None: print section[opts['item']] else: print json.dumps(section, indent=4) except KeyError: sys.stderr.write("Error section \"%s\" not defined in configuration file.\n" % (opts['section'])) sys.exit(1)
def main(): """ main entry point for script """ opts = getoptions() logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', level=opts['log']) if sys.version.startswith("2.7"): logging.captureWarnings(True) config = Config() threads = opts['threads'] if threads <= 1: processjobs(config, opts, None) return else: proclist = [] for procid in xrange(threads): p = Process(target=processjobs, args=(config, opts, procid)) p.start() proclist.append(p) for proc in proclist: p.join()
def test(): """ simple test function """ config = Config() xdm = XDMoDAcct(13, config, None, None) for job in xdm.get(1444151688, None): print job
def main(): """ main entry point for script """ comm = MPI.COMM_WORLD opts = getoptions(True) opts['threads'] = comm.Get_size() logout = "mpiOutput-{}.log".format(comm.Get_rank()) # For MPI jobs, do something sane with logging. setuplogger(logging.ERROR, logout, opts['log']) config = Config() if comm.Get_size() < 2: logging.error("Must run MPI job with at least 2 processes") sys.exit(1) myhost = MPI.Get_processor_name() logging.info("Nodename: %s", myhost) processjobs(config, opts, comm.Get_rank(), comm) logging.info("Rank: %s FINISHED", comm.Get_rank())
def updateMysqlTables(display, opts): """ Interactive mysql script execution """ config = Config() dbsettings = config.getsection("datawarehouse") checkForPreviousInstall(display, dbsettings) migration = pkg_resources.resource_filename( __name__, "migrations/1.0-1.1/modw_supremm.sql") host = dbsettings['host'] port = dbsettings['port'] if 'port' in dbsettings else 3306 display.newpage("MySQL Database setup") myrootuser = display.prompt_string("DB Admin Username", "root") myrootpass = display.prompt_password("DB Admin Password") pflag = "-p{0}".format(myrootpass) if myrootpass != "" else "" shellcmd = "mysql -u {0} {1} -h {2} -P {3} < {4}".format( myrootuser, pflag, host, port, migration) try: if opts.debug: display.print_text(shellcmd) retval = subprocess.call(shellcmd, shell=True) if retval != 0: display.print_warning(""" An error occurred migrating the tables. Please create the tables manually following the documentation in the install guide. """) else: display.print_text("Sucessfully migrated tables") except OSError as e: display.print_warning(""" An error: \"{0}\" occurred running the mysql command. Please create the tables manually following the documentation in the install guide. """.format(e.strerror)) display.hitanykey("Press ENTER to continue.")
def main(): """ print out config data according to cmdline args """ opts = getoptions() conf = Config() try: section = conf.getsection(opts['section']) if opts['item'] != None: print section[opts['item']] else: print json.dumps(section, indent=4) except KeyError: sys.stderr.write( "Error section \"%s\" not defined in configuration file.\n" % (opts['section'])) sys.exit(1)
def runindexing(): """ main script entry point """ opts = getoptions() logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', level=opts['log']) logging.captureWarnings(True) config = Config(opts['config']) for resourcename, resource in config.resourceconfigs(): if opts['resource'] in (None, resourcename, str(resource['resource_id'])): acache = PcpArchiveProcessor(config, resource) afind = PcpArchiveFinder(opts['mindate']) for archivefile in afind.find(resource['pcp_log_dir']): acache.processarchive(archivefile) acache.close()
def create_config(display): """ Create the configuration file """ display.newpage("Configuration File setup (DB)") confpath = getvalidpath(display, "Enter path to configuration files", Config.autodetectconfpath()) defaults = default_settings(confpath) display.newpage() display.print_text("""XDMoD datawarehouse access credentials. There are two options to specify the XDMoD datawarehouse access credentials. Either specify the path to the XDMoD install or specify the hostname, username, password of the database directly. """) doxdmod = display.prompt("Do you wish to specify the XDMoD install directory", ["y", "n"], defaults['usexdmodconfig']) mycnf = None outconfig = {} if doxdmod == "y": outconfig = getxdmodsettings(display, defaults) else: outconfig, mycnf = getdirectsettings(display, defaults) archivedir = display.prompt_string(" Temporary directory to use for job archive processing", defaults['archiveoutdir']) outconfig["summary"] = {"archive_out_dir": archivedir, "subdir_out_format": "%r/%j"} display.newpage("Configuration File setup (Resources)") display.print_text("Autodetecting resources based on configuration file settings") outconfig["resources"] = {} try: config = generatetempconfig(outconfig, mycnf) dbconn = getdbconnection(config.getsection("datawarehouse")) dbcur = dbconn.cursor() dbcur.execute("SELECT id as resource_id, code as resource FROM modw.resourcefact") for resource in dbcur: resconf = configure_resource(display, resource[0], resource[1], defaults) outconfig['resources'][resource[1]] = resconf writeconfig(display, confpath, outconfig, mycnf) except Exception as exc: display.print_warning("An error occurred while detecting resources.\n{0}".format(exc)) display.hitanykey("Press ENTER to continue.")
def main(): """ main entry point for script """ opts = getoptions() logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', level=opts['log']) if sys.version.startswith("2.7"): logging.captureWarnings(True) config = Config(opts['config']) dwconfig = config.getsection("datawarehouse") for resourcename, settings in config.resourceconfigs(): if opts['resource'] in (None, resourcename, str(settings['resource_id'])): logging.debug("Processing %s (id=%s)", resourcename, settings['resource_id']) respath, timestamp_mode = parse_resource_config(settings) if respath: dbif = DbHelper(dwconfig, "modw_supremm", timestamp_mode) total = processfor(settings['resource_id'], respath, dbif, opts['deltadays']) dbif.postinsert() logging.info("Processed %s files for %s", total, resourcename) else: logging.debug("Skip resource %s no script dir defined", resourcename)
def runindexing(): """ main script entry point """ opts = getoptions() setuplogger(opts['log'], opts['debugfile'], logging.DEBUG) config = Config(opts['config']) logging.info("archive indexer starting") for resourcename, resource in config.resourceconfigs(): if opts['resource'] in (None, resourcename, str(resource['resource_id'])): acache = PcpArchiveProcessor(config, resource) afind = PcpArchiveFinder(opts['mindate'], opts['maxdate']) for archivefile in afind.find(resource['pcp_log_dir']): acache.processarchive(archivefile) acache.close() logging.info("archive indexer complete")
def generatetempconfig(confdata, mycnf): """ Generate a configuration object based on the config settings """ confpath = tempfile.mkdtemp() tmpconfdata = copy.deepcopy(confdata) if mycnf != None: mycnfpath = os.path.join(confpath, "my.cnf") tmpconfdata['datawarehouse']['defaultsfile'] = mycnfpath with open(mycnfpath, "w") as tmpmycnf: tmpmycnf.write(mycnf) with open(os.path.join(confpath, "config.json"), "w") as tmpconfig: json.dump(tmpconfdata, tmpconfig, indent=4) config = Config(confpath) return config
def main(): """ main entry point for script """ opts = getoptions(False) setuplogger(opts['log']) config = Config() threads = opts['threads'] process_pool = mp.Pool(threads) if threads > 1 else None processjobs(config, opts, process_pool) if process_pool is not None: # wait for all processes to finish process_pool.close() process_pool.join()
def promptconfig(display): """ prompt user for configuration path """ config = None while config == None: confpath = display.prompt_string("Enter path to configuration files", Config.autodetectconfpath()) try: config = Config(confpath) config.getsection('datawarehouse') config.getsection('outputdatabase') except Exception as e: errmsg = """ Error unable to read valid configuration file in directory {0} Please enter a valid config path (or ctrl-c to exit this program) """ display.newpage() display.print_text(errmsg.format(confpath)) config = None return config
def main(): """ main entry point for script """ opts, args = getoptions() logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', level=opts['log']) if sys.version.startswith("2.7"): logging.captureWarnings(True) preprocs = loadpreprocessors() plugins = loadplugins() if len(opts['plugin_whitelist']) > 0: preprocs, plugins = filter_plugins( {"plugin_whitelist": opts['plugin_whitelist']}, preprocs, plugins) elif len(opts['plugin_blacklist']) > 0: preprocs, plugins = filter_plugins( {"plugin_blacklist": opts['plugin_blacklist']}, preprocs, plugins) logging.debug("Loaded %s preprocessors", len(preprocs)) logging.debug("Loaded %s plugins", len(plugins)) archivelist = args job = MockJob(archivelist) config = Config(confpath=opts['config']) preprocessors = [x(job) for x in preprocs] analytics = [x(job) for x in plugins] s = Summarize(preprocessors, analytics, job, config) s.process() result = s.get() print json.dumps(result, indent=4)
def main(): """ main entry point for script """ opts = getoptions(False) setuplogger(opts['log']) config = Config() threads = opts['threads'] if threads <= 1: processjobs(config, opts, None) return else: proclist = [] for procid in xrange(threads): p = Process(target=processjobs, args=(config, opts, procid)) p.start() proclist.append(p) for proc in proclist: p.join()