def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport().initTool() try: options = _parseArgs() allSymbols = set(stockData["symbol"] for stockData in metric_utils.getMetricsConfiguration().itervalues() ) g_log.info("Verifying that agents are in hot_standby mode") for section in config.sections(): assert config.get(section, "opmode") == config.OP_MODE_HOT_STANDBY g_log.info("Verifying that the old symbol has been removed from the " "metrics configuration") assert options.oldSymbol not in allSymbols g_log.info("Verifying that the new symbol is present in the metrics " "configuration") assert options.newSymbol in allSymbols g_log.info("Migrating BOTH twitter and xignite stock data from " "old-symbol=%s to new-symbol=%s", options.oldSymbol, options.newSymbol) # Rename the metrics in collectorsdb and forward new metric samples to HTM # Engine g_log.info("Modifying old metrics with new symbol") _resymbolTweetVolumeMetric(oldSymbol=options.oldSymbol, newSymbol=options.newSymbol, aggPeriod=options.aggPeriod) _resymbolStockMetrics(oldSymbol=options.oldSymbol, newSymbol=options.newSymbol) # Delete metrics linked to old stock symbol from Taurus Engine delete_companies.deleteCompanies( tickerSymbols=[options.oldSymbol], engineServer=options.htmServer, engineApiKey=options.apikey, warnAboutDestructiveAction=False) except SystemExit as e: if e.code != 0: g_log.exception("Failed!") raise except Exception: g_log.exception("Failed!") raise
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport().initTool() try: options = _parseArgs() allSymbols = set(stockData["symbol"] for stockData in metric_utils.getMetricsConfiguration().itervalues()) g_log.info("Verifying that agents are in hot_standby mode") for section in config.sections(): assert config.get(section, "opmode") == config.OP_MODE_HOT_STANDBY g_log.info("Verifying that the old symbol has been removed from the " "metrics configuration") assert options.oldSymbol not in allSymbols g_log.info("Verifying that the new symbol is present in the metrics " "configuration") assert options.newSymbol in allSymbols g_log.info( "Migrating BOTH twitter and xignite stock data from " "old-symbol=%s to new-symbol=%s", options.oldSymbol, options.newSymbol) # Rename the metrics in collectorsdb and forward new metric samples to HTM # Engine g_log.info("Modifying old metrics with new symbol") _resymbolTweetVolumeMetric(oldSymbol=options.oldSymbol, newSymbol=options.newSymbol, aggPeriod=options.aggPeriod) _resymbolStockMetrics(oldSymbol=options.oldSymbol, newSymbol=options.newSymbol) # Delete metrics linked to old stock symbol from Taurus Engine delete_companies.deleteCompanies(tickerSymbols=[options.oldSymbol], engineServer=options.htmServer, engineApiKey=options.apikey, warnAboutDestructiveAction=False) except SystemExit as e: if e.code != 0: g_log.exception("Failed!") raise except Exception: g_log.exception("Failed!") raise
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport.initService() options = _parseArgs() # See OP_MODE_ACTIVE, etc. in ApplicationConfig opMode = config.get("xignite_security_news_agent", "opmode") g_log.info("Starting: opMode=%s", opMode) aggSec = options.aggIntervalSec # Load metric specs from metric configuration metricSpecs = _loadNewsVolumeMetricSpecs() # Load securities from metric configuration securities = getAllMetricSecurities() g_log.info("Collecting headlines and releases for %s", securities) # Maps security symbols to the datetime.date of most recently-stored headlines lastSecurityHeadlineEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityHeadline) # Map security symbols to the datetime.date of most recently-stored releases lastSecurityReleaseEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityRelease) # Establish/retrieve datetime of last successfully-emitted metric data batch lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime( key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY, aggSec=aggSec) # Calculate next aggregation start time using lastEmittedAggTime as base lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(lastEmittedAggTime) nextAggEnd= lastAggStart + ( int((time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec # Poll, store and emit samples pollingIntervalSec = aggSec / 2.0 numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count()) g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d", pollingIntervalSec, numPoolWorkers) pool = multiprocessing.Pool(processes=numPoolWorkers) try: while True: pollingIntervalEnd = time.time() + pollingIntervalSec # Retrieve all headlines and releases of interest headlineTasks = _generateTasks( securities, lastSecurityHeadlineEndDates, options.backfillDays, taskClass=_HistoricalHeadlinesTask, dryRun=options.dryRun) releaseTasks = _generateTasks( securities, lastSecurityReleaseEndDates, options.backfillDays, taskClass=_HistoricalReleasesTask, dryRun=options.dryRun) allTasks = itertools.chain(headlineTasks, releaseTasks) _processNewsCollectionTasks(pool=pool, tasksIter=allTasks, headlineEndDates=lastSecurityHeadlineEndDates, releaseEndDates=lastSecurityReleaseEndDates, options=options) # Aggregate and forward metric samples to htmengine's Metric Listener if time.time() >= nextAggEnd: if opMode == config.OP_MODE_ACTIVE and not options.dryRun: lastEmittedAggTime = _forwardNewsVolumeMetrics( metricSpecs=metricSpecs, lastEmittedAggTime=lastEmittedAggTime, stopDatetime=datetime.utcfromtimestamp(nextAggEnd), periodSec=aggSec, metricDestAddr=options.metricDestAddr) nextAggEnd += aggSec sleepSec = pollingIntervalEnd - time.time() if sleepSec > 0: g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec) time.sleep(sleepSec) elif sleepSec < 0: g_log.warning("Processing exceeded pollingInterval=%ss by overage=%ss", pollingIntervalSec, -sleepSec) except KeyboardInterrupt: # Log with exception info to help debug deadlocks g_log.info("Observed KeyboardInterrupt", exc_info=True) pass finally: g_log.info("Closing multiprocessing.Pool") pool.close() g_log.info("Terminating multiprocessing.Pool") pool.terminate() g_log.info("Multiprocessing.Pool terminated")
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport.initService() options = _parseArgs() # See OP_MODE_ACTIVE, etc. in ApplicationConfig opMode = config.get("xignite_security_news_agent", "opmode") g_log.info("Starting: opMode=%s", opMode) aggSec = options.aggIntervalSec # Load metric specs from metric configuration metricSpecs = _loadNewsVolumeMetricSpecs() # Load securities from metric configuration securities = getAllMetricSecurities() g_log.info("Collecting headlines and releases for %s", securities) # Maps security symbols to the datetime.date of most recently-stored headlines lastSecurityHeadlineEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityHeadline) # Map security symbols to the datetime.date of most recently-stored releases lastSecurityReleaseEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityRelease) # Establish/retrieve datetime of last successfully-emitted metric data batch lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime( key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY, aggSec=aggSec) # Calculate next aggregation start time using lastEmittedAggTime as base lastAggStart = date_time_utils.epochFromNaiveUTCDatetime( lastEmittedAggTime) nextAggEnd = lastAggStart + (int( (time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec # Poll, store and emit samples pollingIntervalSec = aggSec / 2.0 numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count()) g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d", pollingIntervalSec, numPoolWorkers) pool = multiprocessing.Pool(processes=numPoolWorkers) try: while True: pollingIntervalEnd = time.time() + pollingIntervalSec # Retrieve all headlines and releases of interest headlineTasks = _generateTasks(securities, lastSecurityHeadlineEndDates, options.backfillDays, taskClass=_HistoricalHeadlinesTask, dryRun=options.dryRun) releaseTasks = _generateTasks(securities, lastSecurityReleaseEndDates, options.backfillDays, taskClass=_HistoricalReleasesTask, dryRun=options.dryRun) allTasks = itertools.chain(headlineTasks, releaseTasks) _processNewsCollectionTasks( pool=pool, tasksIter=allTasks, headlineEndDates=lastSecurityHeadlineEndDates, releaseEndDates=lastSecurityReleaseEndDates, options=options) # Aggregate and forward metric samples to htmengine's Metric Listener if time.time() >= nextAggEnd: if opMode == config.OP_MODE_ACTIVE and not options.dryRun: lastEmittedAggTime = _forwardNewsVolumeMetrics( metricSpecs=metricSpecs, lastEmittedAggTime=lastEmittedAggTime, stopDatetime=datetime.utcfromtimestamp(nextAggEnd), periodSec=aggSec, metricDestAddr=options.metricDestAddr) nextAggEnd += aggSec sleepSec = pollingIntervalEnd - time.time() if sleepSec > 0: g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec) time.sleep(sleepSec) elif sleepSec < 0: g_log.warning( "Processing exceeded pollingInterval=%ss by overage=%ss", pollingIntervalSec, -sleepSec) except KeyboardInterrupt: # Log with exception info to help debug deadlocks g_log.info("Observed KeyboardInterrupt", exc_info=True) pass finally: g_log.info("Closing multiprocessing.Pool") pool.close() g_log.info("Terminating multiprocessing.Pool") pool.terminate() g_log.info("Multiprocessing.Pool terminated")
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport().initTool() try: options = _parseArgs() g_log.info("Verifying that agents are in hot_standby mode") for section in config.sections(): try: assert config.get(section, "opmode") == ApplicationConfig.OP_MODE_HOT_STANDBY except Exception, e: raise g_log.info("Verifying that the old symbol has been removed from the " "metrics configuration") for stockData in metric_utils.getMetricsConfiguration().itervalues(): assert stockData["symbol"] != options.old_symbol if options.twitter and (not options.stocks): g_log.info( "Migrating ONLY twitter data from old-symbol=%s " "to new-symbol=%s", options.old_symbol, options.new_symbol, ) elif options.stocks and (not options.twitter): g_log.info( "Migrating ONLY xignite stock data from old-symbol=%s " "to new-symbol=%s", options.old_symbol, options.new_symbol, ) raise NotImplementedError else: g_log.info( "Migrating BOTH twitter and xignite stock data from " "old-symbol=%s to new-symbol=%s", options.old_symbol, options.new_symbol, ) raise NotImplementedError oldSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.old_symbol) newSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.new_symbol) oldSymbolTweetMetricsList = [] with collectorsdb.engineFactory().begin() as conn: g_log.info("Renaming metrics to new symbol") if options.twitter: oldSymbolTweetsQuery = sql.select([tweetSamplesSchema]).where( tweetSamplesSchema.c.metric.contains(oldSymbolTweetPrefix) ) oldSymbolTweets = conn.execute(oldSymbolTweetsQuery) for tweetSample in oldSymbolTweets: newMetricName = "{newPrefix}{metric}".format( newPrefix=newSymbolTweetPrefix, metric=tweetSample.metric[len(oldSymbolTweetPrefix) :] ) if tweetSample.metric not in oldSymbolTweetMetricsList: oldSymbolTweetMetricsList.append(tweetSample.metric) updateSampleQuery = ( tweetSamplesSchema.update() .where(tweetSamplesSchema.c.seq == tweetSample.seq) .values(metric=newMetricName) ) conn.execute(updateSampleQuery) g_log.info("Forwarding new twitter metric data to Taurus engine...") if options.twitter: oldestRecordTs = conn.execute( sql.select([tweetSamplesSchema.c.agg_ts], order_by=tweetSamplesSchema.c.agg_ts.asc()) ).first()[0] lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, aggSec=options.aggPeriod ) aggOffset = ( math.ceil( (epochFromNaiveUTCDatetime(lastEmittedAggTime) - epochFromNaiveUTCDatetime(oldestRecordTs)) / options.aggPeriod ) * options.aggPeriod ) aggStartDatetime = ( lastEmittedAggTime - timedelta(seconds=aggOffset) - timedelta(seconds=options.aggPeriod) ) metric_utils.updateLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=aggStartDatetime ) MetricDataForwarder.runInThread( metricSpecs=loadMetricSpecs(), aggSec=options.aggPeriod, symbolList=[options.new_symbol], forwardOnlyBacklog=True, ) metric_utils.updateLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=lastEmittedAggTime ) g_log.info("Forwarding metrics to dynamodb using new symbol...") if options.twitter: migrate_tweets_to_dynamodb.main(symbolList=[options.new_symbol]) g_log.info("Unmonitoring and deleting existing metrics associated with " "symbol=%s", options.old_symbol) oldModels = metric_utils.getSymbolModels(options.htmServer, options.apikey, options.old_symbol) for model in oldModels: metric_utils.unmonitorMetric(options.htmServer, options.apikey, model.uid) metric_utils.deleteMetric(options.htmServer, options.apikey, model.name)