def testEmittedSampleDatetime(self):
    key = "bogus-test-key"

    # Establish initial sample datetime

    result = metric_utils.establishLastEmittedSampleDatetime(key, 300)

    # Cleanup
    self.addCleanup(collectorsdb.engineFactory().execute,
      schema.emittedSampleTracker.delete().where(
        (schema.emittedSampleTracker.c.key == key)
      )
    )

    self.assertIsInstance(result, datetime)

    # Update latest emitted sample datetime to now

    now = datetime.utcnow().replace(microsecond=0)
    metric_utils.updateLastEmittedSampleDatetime(key, now)

    # Verify that it was updated

    lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key)

    self.assertEqual(now, lastEmittedSample)
    self.assertLess(result, lastEmittedSample)
Пример #2
0
    def testEmittedSampleDatetime(self):
        key = "bogus-test-key"

        # Establish initial sample datetime

        result = metric_utils.establishLastEmittedSampleDatetime(key, 300)

        # Cleanup
        self.addCleanup(
            collectorsdb.engineFactory().execute,
            schema.emittedSampleTracker.delete().where(
                (schema.emittedSampleTracker.c.key == key)))

        self.assertIsInstance(result, datetime)

        # Update latest emitted sample datetime to now

        now = datetime.utcnow().replace(microsecond=0)
        metric_utils.updateLastEmittedSampleDatetime(key, now)

        # Verify that it was updated

        lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key)

        self.assertEqual(now, lastEmittedSample)
        self.assertLess(result, lastEmittedSample)
Пример #3
0
  def testEstablishLastEmittedSampleDatetime(self, collectorsdbMock,
                                             datetimeMock):

    fakeNow = datetime(1997, 8, 29, 2, 14)
    datetimeMock.utcnow.return_value = fakeNow

    # Test with non-None queryLastEmittedSampleDatetime() result

    collectorsdbMock.engineFactory.return_value = Mock(
      spec_set=sqlalchemy.engine.Engine)
    result = metric_utils.establishLastEmittedSampleDatetime(
      "twitter-tweets-volume", 300)

    self.assertEqual(result, (collectorsdbMock
                              .engineFactory
                              .return_value
                              .execute
                              .return_value
                              .scalar
                              .return_value))

    # Test again with None queryLastEmittedSampleDatetime() result

    collectorsdbMock.engineFactory.return_value.execute.reset_mock()
    with patch("taurus.metric_collectors.metric_utils"
               ".queryLastEmittedSampleDatetime") \
         as queryLastEmittedSampleDatetime:
      queryLastEmittedSampleDatetime.return_value = None
      result = metric_utils.establishLastEmittedSampleDatetime(
        "twitter-tweets-volume", 300)

      args, _ = (
        collectorsdbMock.engineFactory.return_value.execute.call_args_list[0])

      params = args[0].compile().construct_params()

      self.assertEqual(params["sample_ts"].microsecond, 0)
      self.assertEqual(fakeNow - params["sample_ts"], timedelta(seconds=300))
      self.assertEqual(queryLastEmittedSampleDatetime.call_count, 2)
      args, _ = (collectorsdbMock
                 .engineFactory
                 .return_value
                 .execute
                 .call_args_list[0])
      self.assertTrue(args)
      self.assertIsInstance(args[0], sqlalchemy.sql.dml.Insert)
Пример #4
0
    def testEstablishLastEmittedSampleDatetime(self, collectorsdbMock,
                                               datetimeMock):

        fakeNow = datetime(1997, 8, 29, 2, 14)
        datetimeMock.utcnow.return_value = fakeNow

        # Test with non-None queryLastEmittedSampleDatetime() result

        collectorsdbMock.engineFactory.return_value = Mock(
            spec_set=sqlalchemy.engine.Engine)
        result = metric_utils.establishLastEmittedSampleDatetime(
            "twitter-tweets-volume", 300)

        self.assertEqual(result, (collectorsdbMock.engineFactory.return_value.
                                  execute.return_value.scalar.return_value))

        # Test again with None queryLastEmittedSampleDatetime() result

        collectorsdbMock.engineFactory.return_value.execute.reset_mock()
        with patch("taurus.metric_collectors.metric_utils"
                   ".queryLastEmittedSampleDatetime") \
             as queryLastEmittedSampleDatetime:
            queryLastEmittedSampleDatetime.return_value = None
            result = metric_utils.establishLastEmittedSampleDatetime(
                "twitter-tweets-volume", 300)

            args, _ = (collectorsdbMock.engineFactory.return_value.execute.
                       call_args_list[0])

            params = args[0].compile().construct_params()

            self.assertEqual(params["sample_ts"].microsecond, 0)
            self.assertEqual(fakeNow - params["sample_ts"],
                             timedelta(seconds=300))
            self.assertEqual(queryLastEmittedSampleDatetime.call_count, 2)
            args, _ = (collectorsdbMock.engineFactory.return_value.execute.
                       call_args_list[0])
            self.assertTrue(args)
            self.assertIsInstance(args[0], sqlalchemy.sql.dml.Insert)
def main():
  """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
  logging_support.LoggingSupport.initService()

  options = _parseArgs()

  # See OP_MODE_ACTIVE, etc. in ApplicationConfig
  opMode = config.get("xignite_security_news_agent", "opmode")
  g_log.info("Starting: opMode=%s", opMode)

  aggSec = options.aggIntervalSec

  # Load metric specs from metric configuration
  metricSpecs = _loadNewsVolumeMetricSpecs()

  # Load securities from metric configuration
  securities = getAllMetricSecurities()
  g_log.info("Collecting headlines and releases for %s", securities)

  # Maps security symbols to the datetime.date of most recently-stored headlines
  lastSecurityHeadlineEndDates = _querySecurityNewsEndDates(
    schema.xigniteSecurityHeadline)

  # Map security symbols to the datetime.date of most recently-stored releases
  lastSecurityReleaseEndDates = _querySecurityNewsEndDates(
    schema.xigniteSecurityRelease)

  # Establish/retrieve datetime of last successfully-emitted metric data batch
  lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
    key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY,
    aggSec=aggSec)

  # Calculate next aggregation start time using lastEmittedAggTime as base
  lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(lastEmittedAggTime)
  nextAggEnd= lastAggStart + (
    int((time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec

  # Poll, store and emit samples
  pollingIntervalSec = aggSec / 2.0
  numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count())
  g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d",
             pollingIntervalSec, numPoolWorkers)
  pool = multiprocessing.Pool(processes=numPoolWorkers)
  try:
    while True:
      pollingIntervalEnd = time.time() + pollingIntervalSec

      # Retrieve all headlines and releases of interest
      headlineTasks = _generateTasks(
        securities,
        lastSecurityHeadlineEndDates,
        options.backfillDays,
        taskClass=_HistoricalHeadlinesTask,
        dryRun=options.dryRun)

      releaseTasks = _generateTasks(
        securities,
        lastSecurityReleaseEndDates,
        options.backfillDays,
        taskClass=_HistoricalReleasesTask,
        dryRun=options.dryRun)

      allTasks = itertools.chain(headlineTasks, releaseTasks)

      _processNewsCollectionTasks(pool=pool,
                                  tasksIter=allTasks,
                                  headlineEndDates=lastSecurityHeadlineEndDates,
                                  releaseEndDates=lastSecurityReleaseEndDates,
                                  options=options)

      # Aggregate and forward metric samples to htmengine's Metric Listener
      if time.time() >= nextAggEnd:
        if opMode == config.OP_MODE_ACTIVE and not options.dryRun:
          lastEmittedAggTime = _forwardNewsVolumeMetrics(
            metricSpecs=metricSpecs,
            lastEmittedAggTime=lastEmittedAggTime,
            stopDatetime=datetime.utcfromtimestamp(nextAggEnd),
            periodSec=aggSec,
            metricDestAddr=options.metricDestAddr)

        nextAggEnd += aggSec

      sleepSec = pollingIntervalEnd - time.time()
      if sleepSec > 0:
        g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec)
        time.sleep(sleepSec)
      elif sleepSec < 0:
        g_log.warning("Processing exceeded pollingInterval=%ss by overage=%ss",
                      pollingIntervalSec, -sleepSec)
  except KeyboardInterrupt:
    # Log with exception info to help debug deadlocks
    g_log.info("Observed KeyboardInterrupt", exc_info=True)
    pass
  finally:
    g_log.info("Closing multiprocessing.Pool")
    pool.close()

    g_log.info("Terminating multiprocessing.Pool")
    pool.terminate()
    g_log.info("Multiprocessing.Pool terminated")
Пример #6
0
def main():
    """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
    logging_support.LoggingSupport.initService()

    options = _parseArgs()

    # See OP_MODE_ACTIVE, etc. in ApplicationConfig
    opMode = config.get("xignite_security_news_agent", "opmode")
    g_log.info("Starting: opMode=%s", opMode)

    aggSec = options.aggIntervalSec

    # Load metric specs from metric configuration
    metricSpecs = _loadNewsVolumeMetricSpecs()

    # Load securities from metric configuration
    securities = getAllMetricSecurities()
    g_log.info("Collecting headlines and releases for %s", securities)

    # Maps security symbols to the datetime.date of most recently-stored headlines
    lastSecurityHeadlineEndDates = _querySecurityNewsEndDates(
        schema.xigniteSecurityHeadline)

    # Map security symbols to the datetime.date of most recently-stored releases
    lastSecurityReleaseEndDates = _querySecurityNewsEndDates(
        schema.xigniteSecurityRelease)

    # Establish/retrieve datetime of last successfully-emitted metric data batch
    lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
        key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY, aggSec=aggSec)

    # Calculate next aggregation start time using lastEmittedAggTime as base
    lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(
        lastEmittedAggTime)
    nextAggEnd = lastAggStart + (int(
        (time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec

    # Poll, store and emit samples
    pollingIntervalSec = aggSec / 2.0
    numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count())
    g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d",
               pollingIntervalSec, numPoolWorkers)
    pool = multiprocessing.Pool(processes=numPoolWorkers)
    try:
        while True:
            pollingIntervalEnd = time.time() + pollingIntervalSec

            # Retrieve all headlines and releases of interest
            headlineTasks = _generateTasks(securities,
                                           lastSecurityHeadlineEndDates,
                                           options.backfillDays,
                                           taskClass=_HistoricalHeadlinesTask,
                                           dryRun=options.dryRun)

            releaseTasks = _generateTasks(securities,
                                          lastSecurityReleaseEndDates,
                                          options.backfillDays,
                                          taskClass=_HistoricalReleasesTask,
                                          dryRun=options.dryRun)

            allTasks = itertools.chain(headlineTasks, releaseTasks)

            _processNewsCollectionTasks(
                pool=pool,
                tasksIter=allTasks,
                headlineEndDates=lastSecurityHeadlineEndDates,
                releaseEndDates=lastSecurityReleaseEndDates,
                options=options)

            # Aggregate and forward metric samples to htmengine's Metric Listener
            if time.time() >= nextAggEnd:
                if opMode == config.OP_MODE_ACTIVE and not options.dryRun:
                    lastEmittedAggTime = _forwardNewsVolumeMetrics(
                        metricSpecs=metricSpecs,
                        lastEmittedAggTime=lastEmittedAggTime,
                        stopDatetime=datetime.utcfromtimestamp(nextAggEnd),
                        periodSec=aggSec,
                        metricDestAddr=options.metricDestAddr)

                nextAggEnd += aggSec

            sleepSec = pollingIntervalEnd - time.time()
            if sleepSec > 0:
                g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec)
                time.sleep(sleepSec)
            elif sleepSec < 0:
                g_log.warning(
                    "Processing exceeded pollingInterval=%ss by overage=%ss",
                    pollingIntervalSec, -sleepSec)
    except KeyboardInterrupt:
        # Log with exception info to help debug deadlocks
        g_log.info("Observed KeyboardInterrupt", exc_info=True)
        pass
    finally:
        g_log.info("Closing multiprocessing.Pool")
        pool.close()

        g_log.info("Terminating multiprocessing.Pool")
        pool.terminate()
        g_log.info("Multiprocessing.Pool terminated")
Пример #7
0
def main():
    """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
    logging_support.LoggingSupport().initTool()

    try:
        options = _parseArgs()

        g_log.info("Verifying that agents are in hot_standby mode")
        for section in config.sections():
            try:
                assert config.get(section, "opmode") == ApplicationConfig.OP_MODE_HOT_STANDBY
            except Exception, e:
                raise

        g_log.info("Verifying that the old symbol has been removed from the " "metrics configuration")
        for stockData in metric_utils.getMetricsConfiguration().itervalues():
            assert stockData["symbol"] != options.old_symbol

        if options.twitter and (not options.stocks):
            g_log.info(
                "Migrating ONLY twitter data from old-symbol=%s " "to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
        elif options.stocks and (not options.twitter):
            g_log.info(
                "Migrating ONLY xignite stock data from old-symbol=%s " "to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
            raise NotImplementedError
        else:
            g_log.info(
                "Migrating BOTH twitter and xignite stock data from " "old-symbol=%s to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
            raise NotImplementedError

        oldSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.old_symbol)
        newSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.new_symbol)
        oldSymbolTweetMetricsList = []

        with collectorsdb.engineFactory().begin() as conn:

            g_log.info("Renaming metrics to new symbol")
            if options.twitter:
                oldSymbolTweetsQuery = sql.select([tweetSamplesSchema]).where(
                    tweetSamplesSchema.c.metric.contains(oldSymbolTweetPrefix)
                )
                oldSymbolTweets = conn.execute(oldSymbolTweetsQuery)
                for tweetSample in oldSymbolTweets:
                    newMetricName = "{newPrefix}{metric}".format(
                        newPrefix=newSymbolTweetPrefix, metric=tweetSample.metric[len(oldSymbolTweetPrefix) :]
                    )
                    if tweetSample.metric not in oldSymbolTweetMetricsList:
                        oldSymbolTweetMetricsList.append(tweetSample.metric)

                    updateSampleQuery = (
                        tweetSamplesSchema.update()
                        .where(tweetSamplesSchema.c.seq == tweetSample.seq)
                        .values(metric=newMetricName)
                    )

                    conn.execute(updateSampleQuery)

            g_log.info("Forwarding new twitter metric data to Taurus engine...")
            if options.twitter:
                oldestRecordTs = conn.execute(
                    sql.select([tweetSamplesSchema.c.agg_ts], order_by=tweetSamplesSchema.c.agg_ts.asc())
                ).first()[0]
                lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, aggSec=options.aggPeriod
                )
                aggOffset = (
                    math.ceil(
                        (epochFromNaiveUTCDatetime(lastEmittedAggTime) - epochFromNaiveUTCDatetime(oldestRecordTs))
                        / options.aggPeriod
                    )
                    * options.aggPeriod
                )
                aggStartDatetime = (
                    lastEmittedAggTime - timedelta(seconds=aggOffset) - timedelta(seconds=options.aggPeriod)
                )

                metric_utils.updateLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=aggStartDatetime
                )

                MetricDataForwarder.runInThread(
                    metricSpecs=loadMetricSpecs(),
                    aggSec=options.aggPeriod,
                    symbolList=[options.new_symbol],
                    forwardOnlyBacklog=True,
                )

                metric_utils.updateLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=lastEmittedAggTime
                )

        g_log.info("Forwarding metrics to dynamodb using new symbol...")
        if options.twitter:
            migrate_tweets_to_dynamodb.main(symbolList=[options.new_symbol])

        g_log.info("Unmonitoring and deleting existing metrics associated with " "symbol=%s", options.old_symbol)
        oldModels = metric_utils.getSymbolModels(options.htmServer, options.apikey, options.old_symbol)
        for model in oldModels:
            metric_utils.unmonitorMetric(options.htmServer, options.apikey, model.uid)
            metric_utils.deleteMetric(options.htmServer, options.apikey, model.name)