def main():
  """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
  logging_support.LoggingSupport().initTool()

  try:
    options = _parseArgs()
    allSymbols = set(stockData["symbol"] for stockData in
                     metric_utils.getMetricsConfiguration().itervalues() )

    g_log.info("Verifying that agents are in hot_standby mode")
    for section in config.sections():
      assert config.get(section, "opmode") == config.OP_MODE_HOT_STANDBY

    g_log.info("Verifying that the old symbol has been removed from the "
               "metrics configuration")
    assert options.oldSymbol not in allSymbols

    g_log.info("Verifying that the new symbol is present in the metrics "
               "configuration")
    assert options.newSymbol in allSymbols

    g_log.info("Migrating BOTH twitter and xignite stock data from "
               "old-symbol=%s to new-symbol=%s",
               options.oldSymbol, options.newSymbol)

    # Rename the metrics in collectorsdb and forward new metric samples to HTM
    # Engine
    g_log.info("Modifying old metrics with new symbol")

    _resymbolTweetVolumeMetric(oldSymbol=options.oldSymbol,
                               newSymbol=options.newSymbol,
                               aggPeriod=options.aggPeriod)

    _resymbolStockMetrics(oldSymbol=options.oldSymbol,
                          newSymbol=options.newSymbol)


    # Delete metrics linked to old stock symbol from Taurus Engine
    delete_companies.deleteCompanies(
      tickerSymbols=[options.oldSymbol],
      engineServer=options.htmServer,
      engineApiKey=options.apikey,
      warnAboutDestructiveAction=False)
  except SystemExit as e:
    if e.code != 0:
      g_log.exception("Failed!")
    raise
  except Exception:
    g_log.exception("Failed!")
    raise
Exemplo n.º 2
0
def main():
    """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
    logging_support.LoggingSupport().initTool()

    try:
        options = _parseArgs()
        allSymbols = set(stockData["symbol"] for stockData in
                         metric_utils.getMetricsConfiguration().itervalues())

        g_log.info("Verifying that agents are in hot_standby mode")
        for section in config.sections():
            assert config.get(section, "opmode") == config.OP_MODE_HOT_STANDBY

        g_log.info("Verifying that the old symbol has been removed from the "
                   "metrics configuration")
        assert options.oldSymbol not in allSymbols

        g_log.info("Verifying that the new symbol is present in the metrics "
                   "configuration")
        assert options.newSymbol in allSymbols

        g_log.info(
            "Migrating BOTH twitter and xignite stock data from "
            "old-symbol=%s to new-symbol=%s", options.oldSymbol,
            options.newSymbol)

        # Rename the metrics in collectorsdb and forward new metric samples to HTM
        # Engine
        g_log.info("Modifying old metrics with new symbol")

        _resymbolTweetVolumeMetric(oldSymbol=options.oldSymbol,
                                   newSymbol=options.newSymbol,
                                   aggPeriod=options.aggPeriod)

        _resymbolStockMetrics(oldSymbol=options.oldSymbol,
                              newSymbol=options.newSymbol)

        # Delete metrics linked to old stock symbol from Taurus Engine
        delete_companies.deleteCompanies(tickerSymbols=[options.oldSymbol],
                                         engineServer=options.htmServer,
                                         engineApiKey=options.apikey,
                                         warnAboutDestructiveAction=False)
    except SystemExit as e:
        if e.code != 0:
            g_log.exception("Failed!")
        raise
    except Exception:
        g_log.exception("Failed!")
        raise
def main():
  """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
  logging_support.LoggingSupport.initService()

  options = _parseArgs()

  # See OP_MODE_ACTIVE, etc. in ApplicationConfig
  opMode = config.get("xignite_security_news_agent", "opmode")
  g_log.info("Starting: opMode=%s", opMode)

  aggSec = options.aggIntervalSec

  # Load metric specs from metric configuration
  metricSpecs = _loadNewsVolumeMetricSpecs()

  # Load securities from metric configuration
  securities = getAllMetricSecurities()
  g_log.info("Collecting headlines and releases for %s", securities)

  # Maps security symbols to the datetime.date of most recently-stored headlines
  lastSecurityHeadlineEndDates = _querySecurityNewsEndDates(
    schema.xigniteSecurityHeadline)

  # Map security symbols to the datetime.date of most recently-stored releases
  lastSecurityReleaseEndDates = _querySecurityNewsEndDates(
    schema.xigniteSecurityRelease)

  # Establish/retrieve datetime of last successfully-emitted metric data batch
  lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
    key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY,
    aggSec=aggSec)

  # Calculate next aggregation start time using lastEmittedAggTime as base
  lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(lastEmittedAggTime)
  nextAggEnd= lastAggStart + (
    int((time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec

  # Poll, store and emit samples
  pollingIntervalSec = aggSec / 2.0
  numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count())
  g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d",
             pollingIntervalSec, numPoolWorkers)
  pool = multiprocessing.Pool(processes=numPoolWorkers)
  try:
    while True:
      pollingIntervalEnd = time.time() + pollingIntervalSec

      # Retrieve all headlines and releases of interest
      headlineTasks = _generateTasks(
        securities,
        lastSecurityHeadlineEndDates,
        options.backfillDays,
        taskClass=_HistoricalHeadlinesTask,
        dryRun=options.dryRun)

      releaseTasks = _generateTasks(
        securities,
        lastSecurityReleaseEndDates,
        options.backfillDays,
        taskClass=_HistoricalReleasesTask,
        dryRun=options.dryRun)

      allTasks = itertools.chain(headlineTasks, releaseTasks)

      _processNewsCollectionTasks(pool=pool,
                                  tasksIter=allTasks,
                                  headlineEndDates=lastSecurityHeadlineEndDates,
                                  releaseEndDates=lastSecurityReleaseEndDates,
                                  options=options)

      # Aggregate and forward metric samples to htmengine's Metric Listener
      if time.time() >= nextAggEnd:
        if opMode == config.OP_MODE_ACTIVE and not options.dryRun:
          lastEmittedAggTime = _forwardNewsVolumeMetrics(
            metricSpecs=metricSpecs,
            lastEmittedAggTime=lastEmittedAggTime,
            stopDatetime=datetime.utcfromtimestamp(nextAggEnd),
            periodSec=aggSec,
            metricDestAddr=options.metricDestAddr)

        nextAggEnd += aggSec

      sleepSec = pollingIntervalEnd - time.time()
      if sleepSec > 0:
        g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec)
        time.sleep(sleepSec)
      elif sleepSec < 0:
        g_log.warning("Processing exceeded pollingInterval=%ss by overage=%ss",
                      pollingIntervalSec, -sleepSec)
  except KeyboardInterrupt:
    # Log with exception info to help debug deadlocks
    g_log.info("Observed KeyboardInterrupt", exc_info=True)
    pass
  finally:
    g_log.info("Closing multiprocessing.Pool")
    pool.close()

    g_log.info("Terminating multiprocessing.Pool")
    pool.terminate()
    g_log.info("Multiprocessing.Pool terminated")
Exemplo n.º 4
0
def main():
    """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
    logging_support.LoggingSupport.initService()

    options = _parseArgs()

    # See OP_MODE_ACTIVE, etc. in ApplicationConfig
    opMode = config.get("xignite_security_news_agent", "opmode")
    g_log.info("Starting: opMode=%s", opMode)

    aggSec = options.aggIntervalSec

    # Load metric specs from metric configuration
    metricSpecs = _loadNewsVolumeMetricSpecs()

    # Load securities from metric configuration
    securities = getAllMetricSecurities()
    g_log.info("Collecting headlines and releases for %s", securities)

    # Maps security symbols to the datetime.date of most recently-stored headlines
    lastSecurityHeadlineEndDates = _querySecurityNewsEndDates(
        schema.xigniteSecurityHeadline)

    # Map security symbols to the datetime.date of most recently-stored releases
    lastSecurityReleaseEndDates = _querySecurityNewsEndDates(
        schema.xigniteSecurityRelease)

    # Establish/retrieve datetime of last successfully-emitted metric data batch
    lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
        key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY, aggSec=aggSec)

    # Calculate next aggregation start time using lastEmittedAggTime as base
    lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(
        lastEmittedAggTime)
    nextAggEnd = lastAggStart + (int(
        (time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec

    # Poll, store and emit samples
    pollingIntervalSec = aggSec / 2.0
    numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count())
    g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d",
               pollingIntervalSec, numPoolWorkers)
    pool = multiprocessing.Pool(processes=numPoolWorkers)
    try:
        while True:
            pollingIntervalEnd = time.time() + pollingIntervalSec

            # Retrieve all headlines and releases of interest
            headlineTasks = _generateTasks(securities,
                                           lastSecurityHeadlineEndDates,
                                           options.backfillDays,
                                           taskClass=_HistoricalHeadlinesTask,
                                           dryRun=options.dryRun)

            releaseTasks = _generateTasks(securities,
                                          lastSecurityReleaseEndDates,
                                          options.backfillDays,
                                          taskClass=_HistoricalReleasesTask,
                                          dryRun=options.dryRun)

            allTasks = itertools.chain(headlineTasks, releaseTasks)

            _processNewsCollectionTasks(
                pool=pool,
                tasksIter=allTasks,
                headlineEndDates=lastSecurityHeadlineEndDates,
                releaseEndDates=lastSecurityReleaseEndDates,
                options=options)

            # Aggregate and forward metric samples to htmengine's Metric Listener
            if time.time() >= nextAggEnd:
                if opMode == config.OP_MODE_ACTIVE and not options.dryRun:
                    lastEmittedAggTime = _forwardNewsVolumeMetrics(
                        metricSpecs=metricSpecs,
                        lastEmittedAggTime=lastEmittedAggTime,
                        stopDatetime=datetime.utcfromtimestamp(nextAggEnd),
                        periodSec=aggSec,
                        metricDestAddr=options.metricDestAddr)

                nextAggEnd += aggSec

            sleepSec = pollingIntervalEnd - time.time()
            if sleepSec > 0:
                g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec)
                time.sleep(sleepSec)
            elif sleepSec < 0:
                g_log.warning(
                    "Processing exceeded pollingInterval=%ss by overage=%ss",
                    pollingIntervalSec, -sleepSec)
    except KeyboardInterrupt:
        # Log with exception info to help debug deadlocks
        g_log.info("Observed KeyboardInterrupt", exc_info=True)
        pass
    finally:
        g_log.info("Closing multiprocessing.Pool")
        pool.close()

        g_log.info("Terminating multiprocessing.Pool")
        pool.terminate()
        g_log.info("Multiprocessing.Pool terminated")
Exemplo n.º 5
0
def main():
    """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
    logging_support.LoggingSupport().initTool()

    try:
        options = _parseArgs()

        g_log.info("Verifying that agents are in hot_standby mode")
        for section in config.sections():
            try:
                assert config.get(section, "opmode") == ApplicationConfig.OP_MODE_HOT_STANDBY
            except Exception, e:
                raise

        g_log.info("Verifying that the old symbol has been removed from the " "metrics configuration")
        for stockData in metric_utils.getMetricsConfiguration().itervalues():
            assert stockData["symbol"] != options.old_symbol

        if options.twitter and (not options.stocks):
            g_log.info(
                "Migrating ONLY twitter data from old-symbol=%s " "to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
        elif options.stocks and (not options.twitter):
            g_log.info(
                "Migrating ONLY xignite stock data from old-symbol=%s " "to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
            raise NotImplementedError
        else:
            g_log.info(
                "Migrating BOTH twitter and xignite stock data from " "old-symbol=%s to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
            raise NotImplementedError

        oldSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.old_symbol)
        newSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.new_symbol)
        oldSymbolTweetMetricsList = []

        with collectorsdb.engineFactory().begin() as conn:

            g_log.info("Renaming metrics to new symbol")
            if options.twitter:
                oldSymbolTweetsQuery = sql.select([tweetSamplesSchema]).where(
                    tweetSamplesSchema.c.metric.contains(oldSymbolTweetPrefix)
                )
                oldSymbolTweets = conn.execute(oldSymbolTweetsQuery)
                for tweetSample in oldSymbolTweets:
                    newMetricName = "{newPrefix}{metric}".format(
                        newPrefix=newSymbolTweetPrefix, metric=tweetSample.metric[len(oldSymbolTweetPrefix) :]
                    )
                    if tweetSample.metric not in oldSymbolTweetMetricsList:
                        oldSymbolTweetMetricsList.append(tweetSample.metric)

                    updateSampleQuery = (
                        tweetSamplesSchema.update()
                        .where(tweetSamplesSchema.c.seq == tweetSample.seq)
                        .values(metric=newMetricName)
                    )

                    conn.execute(updateSampleQuery)

            g_log.info("Forwarding new twitter metric data to Taurus engine...")
            if options.twitter:
                oldestRecordTs = conn.execute(
                    sql.select([tweetSamplesSchema.c.agg_ts], order_by=tweetSamplesSchema.c.agg_ts.asc())
                ).first()[0]
                lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, aggSec=options.aggPeriod
                )
                aggOffset = (
                    math.ceil(
                        (epochFromNaiveUTCDatetime(lastEmittedAggTime) - epochFromNaiveUTCDatetime(oldestRecordTs))
                        / options.aggPeriod
                    )
                    * options.aggPeriod
                )
                aggStartDatetime = (
                    lastEmittedAggTime - timedelta(seconds=aggOffset) - timedelta(seconds=options.aggPeriod)
                )

                metric_utils.updateLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=aggStartDatetime
                )

                MetricDataForwarder.runInThread(
                    metricSpecs=loadMetricSpecs(),
                    aggSec=options.aggPeriod,
                    symbolList=[options.new_symbol],
                    forwardOnlyBacklog=True,
                )

                metric_utils.updateLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=lastEmittedAggTime
                )

        g_log.info("Forwarding metrics to dynamodb using new symbol...")
        if options.twitter:
            migrate_tweets_to_dynamodb.main(symbolList=[options.new_symbol])

        g_log.info("Unmonitoring and deleting existing metrics associated with " "symbol=%s", options.old_symbol)
        oldModels = metric_utils.getSymbolModels(options.htmServer, options.apikey, options.old_symbol)
        for model in oldModels:
            metric_utils.unmonitorMetric(options.htmServer, options.apikey, model.uid)
            metric_utils.deleteMetric(options.htmServer, options.apikey, model.name)