Exemplo n.º 1
0
def _checkTwitterScreenNames(consumerKey, consumerSecret, accessToken,
                             accessTokenSecret, errorReportEmailAwsRegion,
                             errorReportEmailSesEndpoint,
                             errorReportEmailSenderAddress, awsAccessKeyId,
                             awsSecretAccessKey, errorReportEmailRecipients):
    """
  Check if twitter screen names are still valid.
  Email notifications are sent for unmapped screen names.
  Each time an unmapped screen name is reported successfully, we add it to a
  table keeping track of unmapped screen names that were already reported -- to
  avoid duplicate emails reporting the same unmapped screen name.

  :param consumerKey: Twitter consumer key
  :param consumerSecret: Twitter consumer secret
  :param accessToken: Twitter access token
  :param accessTokenSecret: Twitter access token secret
  :param errorReportEmailAwsRegion: AWS region for error report email
  :type errorReportEmailAwsRegion: string
  :param errorReportEmailSesEndpoint: AWS/SES endpoint for error report email
  :type errorReportEmailSesEndpoint: string
  :param errorReportEmailSenderAddress: Sender address for error report email
  :type errorReportEmailSenderAddress: string
  :param awsAccessKeyId: AWS access key ID for error report email
  :type awsAccessKeyId: string
  :param awsSecretAccessKey: AWS secret access key for error report email
  :type awsSecretAccessKey: string
  :param errorReportEmailRecipients: Recipients error report email
  :type errorReportEmailRecipients: list of strings
  """

    authHandler = tweepy.OAuthHandler(consumerKey, consumerSecret)
    authHandler.set_access_token(accessToken, accessTokenSecret)
    tweepyApi = tweepy.API(authHandler)

    # list of screen names
    metricSpecs = loadMetricSpecs()
    screenNames = []
    for spec in metricSpecs:
        for screenName in spec.screenNames:
            screenNames.append(screenName.lower())

    unmappedScreenNames = _resolveUnmappedScreenNames(tweepyApi, screenNames)

    if unmappedScreenNames:
        g_log.error("No mappings for screenNames=%s", unmappedScreenNames)

        _reportUnmappedScreenNames(unmappedScreenNames=unmappedScreenNames,
                                   awsRegion=errorReportEmailAwsRegion,
                                   sesEndpoint=errorReportEmailSesEndpoint,
                                   senderAddress=errorReportEmailSenderAddress,
                                   awsAccessKeyId=awsAccessKeyId,
                                   awsSecretAccessKey=awsSecretAccessKey,
                                   recipients=errorReportEmailRecipients)
    else:
        # clearing rows of twitter_handle_failures table
        _deleteScreenNameFailures()
        g_log.info("All screen names resolved successfully")
def _checkTwitterScreenNames(
    consumerKey,
    consumerSecret,
    accessToken,
    accessTokenSecret,
    errorReportEmailAwsRegion,
    errorReportEmailSesEndpoint,
    errorReportEmailSenderAddress,
    awsAccessKeyId,
    awsSecretAccessKey,
    errorReportEmailRecipients,
):
    """
  Check if twitter screen names are still valid.
  Email notifications are sent for unmapped screen names.
  Each time an unmapped screen name is reported successfully, we add it to a
  table keeping track of unmapped screen names that were already reported -- to
  avoid duplicate emails reporting the same unmapped screen name.

  :param consumerKey: Twitter consumer key
  :param consumerSecret: Twitter consumer secret
  :param accessToken: Twitter access token
  :param accessTokenSecret: Twitter access token secret
  :param errorReportEmailAwsRegion: AWS region for error report email
  :type errorReportEmailAwsRegion: string
  :param errorReportEmailSesEndpoint: AWS/SES endpoint for error report email
  :type errorReportEmailSesEndpoint: string
  :param errorReportEmailSenderAddress: Sender address for error report email
  :type errorReportEmailSenderAddress: string
  :param awsAccessKeyId: AWS access key ID for error report email
  :type awsAccessKeyId: string
  :param awsSecretAccessKey: AWS secret access key for error report email
  :type awsSecretAccessKey: string
  :param errorReportEmailRecipients: Recipients error report email
  :type errorReportEmailRecipients: list of strings
  """

    authHandler = tweepy.OAuthHandler(consumerKey, consumerSecret)
    authHandler.set_access_token(accessToken, accessTokenSecret)
    tweepyApi = tweepy.API(authHandler)

    # list of screen names
    metricSpecs = loadMetricSpecs()
    screenNames = []
    for spec in metricSpecs:
        for screenName in spec.screenNames:
            screenNames.append(screenName.lower())

    unmappedScreenNames = _resolveUnmappedScreenNames(tweepyApi, screenNames)

    if unmappedScreenNames:
        g_log.error("No mappings for screenNames=%s", unmappedScreenNames)

        _reportUnmappedScreenNames(
            unmappedScreenNames=unmappedScreenNames,
            awsRegion=errorReportEmailAwsRegion,
            sesEndpoint=errorReportEmailSesEndpoint,
            senderAddress=errorReportEmailSenderAddress,
            awsAccessKeyId=awsAccessKeyId,
            awsSecretAccessKey=awsSecretAccessKey,
            recipients=errorReportEmailRecipients,
        )
    else:
        # clearing rows of twitter_handle_failures table
        _deleteScreenNameFailures()
        g_log.info("All screen names resolved successfully")
Exemplo n.º 3
0
def main():
    """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
    logging_support.LoggingSupport().initTool()

    try:
        options = _parseArgs()

        g_log.info("Verifying that agents are in hot_standby mode")
        for section in config.sections():
            try:
                assert config.get(section, "opmode") == ApplicationConfig.OP_MODE_HOT_STANDBY
            except Exception, e:
                raise

        g_log.info("Verifying that the old symbol has been removed from the " "metrics configuration")
        for stockData in metric_utils.getMetricsConfiguration().itervalues():
            assert stockData["symbol"] != options.old_symbol

        if options.twitter and (not options.stocks):
            g_log.info(
                "Migrating ONLY twitter data from old-symbol=%s " "to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
        elif options.stocks and (not options.twitter):
            g_log.info(
                "Migrating ONLY xignite stock data from old-symbol=%s " "to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
            raise NotImplementedError
        else:
            g_log.info(
                "Migrating BOTH twitter and xignite stock data from " "old-symbol=%s to new-symbol=%s",
                options.old_symbol,
                options.new_symbol,
            )
            raise NotImplementedError

        oldSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.old_symbol)
        newSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.new_symbol)
        oldSymbolTweetMetricsList = []

        with collectorsdb.engineFactory().begin() as conn:

            g_log.info("Renaming metrics to new symbol")
            if options.twitter:
                oldSymbolTweetsQuery = sql.select([tweetSamplesSchema]).where(
                    tweetSamplesSchema.c.metric.contains(oldSymbolTweetPrefix)
                )
                oldSymbolTweets = conn.execute(oldSymbolTweetsQuery)
                for tweetSample in oldSymbolTweets:
                    newMetricName = "{newPrefix}{metric}".format(
                        newPrefix=newSymbolTweetPrefix, metric=tweetSample.metric[len(oldSymbolTweetPrefix) :]
                    )
                    if tweetSample.metric not in oldSymbolTweetMetricsList:
                        oldSymbolTweetMetricsList.append(tweetSample.metric)

                    updateSampleQuery = (
                        tweetSamplesSchema.update()
                        .where(tweetSamplesSchema.c.seq == tweetSample.seq)
                        .values(metric=newMetricName)
                    )

                    conn.execute(updateSampleQuery)

            g_log.info("Forwarding new twitter metric data to Taurus engine...")
            if options.twitter:
                oldestRecordTs = conn.execute(
                    sql.select([tweetSamplesSchema.c.agg_ts], order_by=tweetSamplesSchema.c.agg_ts.asc())
                ).first()[0]
                lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, aggSec=options.aggPeriod
                )
                aggOffset = (
                    math.ceil(
                        (epochFromNaiveUTCDatetime(lastEmittedAggTime) - epochFromNaiveUTCDatetime(oldestRecordTs))
                        / options.aggPeriod
                    )
                    * options.aggPeriod
                )
                aggStartDatetime = (
                    lastEmittedAggTime - timedelta(seconds=aggOffset) - timedelta(seconds=options.aggPeriod)
                )

                metric_utils.updateLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=aggStartDatetime
                )

                MetricDataForwarder.runInThread(
                    metricSpecs=loadMetricSpecs(),
                    aggSec=options.aggPeriod,
                    symbolList=[options.new_symbol],
                    forwardOnlyBacklog=True,
                )

                metric_utils.updateLastEmittedSampleDatetime(
                    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=lastEmittedAggTime
                )

        g_log.info("Forwarding metrics to dynamodb using new symbol...")
        if options.twitter:
            migrate_tweets_to_dynamodb.main(symbolList=[options.new_symbol])

        g_log.info("Unmonitoring and deleting existing metrics associated with " "symbol=%s", options.old_symbol)
        oldModels = metric_utils.getSymbolModels(options.htmServer, options.apikey, options.old_symbol)
        for model in oldModels:
            metric_utils.unmonitorMetric(options.htmServer, options.apikey, model.uid)
            metric_utils.deleteMetric(options.htmServer, options.apikey, model.name)
Exemplo n.º 4
0
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod):
    """ Perform the workflow of resymboling a tweet volume metric that consists of
  the following steps:
    1. Reassign bufferred tweet samples in collectorsdb to the new metric.
    2. Forward the new metric data samples to HTM Engine
    3. Forward the tweet media to dynamodb

  :param str oldSymbol: old stock symbol, upper case
  :param str newSymbol: new stock symbol, upper case
  :param int aggPeriod: metric aggregation period in seconds
  """
    g_log.info(
        "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s",
        oldSymbol, newSymbol, aggPeriod)

    oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol)
    newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol)

    sqlEngine = collectorsdb.engineFactory()

    # Rename the metric in tweet sample rows

    with sqlEngine.begin() as conn:
        # Verify that metric samples with new symbol don't overlap with with samples
        # corresponding to the old symbol
        g_log.info(
            "Verifying that newMetric=%s in table=%s doesn't overlap with "
            "the oldMetric=%s.", newMetricName, schema.twitterTweetSamples,
            oldMetricName)

        maxOldMetricAggTimestamp = conn.execute(
            sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts)
                        ])).scalar()

        if maxOldMetricAggTimestamp is not None:
            overlappingRow = conn.execute(
                sql.select([
                    schema.twitterTweetSamples.c.metric
                ]).where(schema.twitterTweetSamples.c.metric == newMetricName).
                where(schema.twitterTweetSamples.c.agg_ts <=
                      maxOldMetricAggTimestamp).order_by(
                          schema.twitterTweetSamples.c.agg_ts.asc()).limit(
                              1)).first()
            assert overlappingRow is None, overlappingRow

        # Re-symbol the tweet sample metric rows
        g_log.info("Renaming tweet sample metric %s with %s", oldMetricName,
                   newMetricName)
        conn.execute(
            schema.twitterTweetSamples  # pylint: disable=E1120
            .update().where(
                schema.twitterTweetSamples.c.metric == oldMetricName).values(
                    metric=newMetricName))

    # Forward tweet metric samples to Taurus Engine

    g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...",
               newMetricName)

    # Get the aggregation timestamp of the starting tweet sample to forward
    #
    # NOTE: prior to March 2015, tweet samples didn't have a consistent reference
    # between twitter agent's restarts. This issue was address with the
    # introduction of emitted_sample_tracker table.
    #
    timestampScanLowerBound = (datetime.utcnow() -
                               timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS))

    aggStartDatetime = sqlEngine.execute(
        sql.select(
            [schema.twitterTweetSamples.c.agg_ts],
            order_by=schema.twitterTweetSamples.c.agg_ts.asc()).where(
                schema.twitterTweetSamples.c.metric == newMetricName).where(
                    schema.twitterTweetSamples.c.agg_ts >
                    timestampScanLowerBound).limit(1)).scalar()

    # Get the timestamp of the most recent sample batch emitted to Taurus engine
    lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime(
        key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY)

    if lastEmittedAggTime is None:
        # Last emitted sample datetime has not been established yet; we'll rely
        # on the twitter agent to forward all metric samples to HTM engine
        g_log.info(
            "Last emitted sample datetime has not been established yet; "
            "deferring metric sample forwarding to Twitter Agent.")
        return

    metricDataForwarder = twitter_direct_agent.MetricDataForwarder(
        metricSpecs=twitter_direct_agent.loadMetricSpecs(), aggSec=aggPeriod)

    metricDataForwarder.aggregateAndForward(aggStartDatetime=aggStartDatetime,
                                            stopDatetime=lastEmittedAggTime +
                                            timedelta(seconds=aggPeriod),
                                            metrics=[newMetricName])

    # Forward tweet media to dynamodb
    g_log.info("Forwarding twitter tweets to dynamodb using new symbol...")
    migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod):
  """ Perform the workflow of resymboling a tweet volume metric that consists of
  the following steps:
    1. Reassign bufferred tweet samples in collectorsdb to the new metric.
    2. Forward the new metric data samples to HTM Engine
    3. Forward the tweet media to dynamodb

  :param str oldSymbol: old stock symbol, upper case
  :param str newSymbol: new stock symbol, upper case
  :param int aggPeriod: metric aggregation period in seconds
  """
  g_log.info(
    "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s",
    oldSymbol, newSymbol, aggPeriod)

  oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol)
  newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol)

  sqlEngine = collectorsdb.engineFactory()

  # Rename the metric in tweet sample rows

  with sqlEngine.begin() as conn:
    # Verify that metric samples with new symbol don't overlap with with samples
    # corresponding to the old symbol
    g_log.info("Verifying that newMetric=%s in table=%s doesn't overlap with "
               "the oldMetric=%s.",
               newMetricName, schema.twitterTweetSamples, oldMetricName)

    maxOldMetricAggTimestamp = conn.execute(
      sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts)])
    ).scalar()

    if maxOldMetricAggTimestamp is not None:
      overlappingRow = conn.execute(
        sql.select([schema.twitterTweetSamples.c.metric])
        .where(schema.twitterTweetSamples.c.metric == newMetricName)
        .where(schema.twitterTweetSamples.c.agg_ts <= maxOldMetricAggTimestamp)
        .order_by(schema.twitterTweetSamples.c.agg_ts.asc())
        .limit(1)).first()
      assert overlappingRow is None, overlappingRow

    # Re-symbol the tweet sample metric rows
    g_log.info("Renaming tweet sample metric %s with %s",
               oldMetricName, newMetricName)
    conn.execute(
      schema.twitterTweetSamples  # pylint: disable=E1120
      .update()
      .where(schema.twitterTweetSamples.c.metric == oldMetricName)
      .values(metric=newMetricName))


  # Forward tweet metric samples to Taurus Engine

  g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...",
             newMetricName)

  # Get the aggregation timestamp of the starting tweet sample to forward
  #
  # NOTE: prior to March 2015, tweet samples didn't have a consistent reference
  # between twitter agent's restarts. This issue was address with the
  # introduction of emitted_sample_tracker table.
  #
  timestampScanLowerBound = (datetime.utcnow() -
                             timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS))

  aggStartDatetime = sqlEngine.execute(
    sql.select([schema.twitterTweetSamples.c.agg_ts],
               order_by=schema.twitterTweetSamples.c.agg_ts.asc())
    .where(schema.twitterTweetSamples.c.metric == newMetricName)
    .where(schema.twitterTweetSamples.c.agg_ts > timestampScanLowerBound)
    .limit(1)).scalar()

  # Get the timestamp of the most recent sample batch emitted to Taurus engine
  lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime(
    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY)

  if lastEmittedAggTime is None:
    # Last emitted sample datetime has not been established yet; we'll rely
    # on the twitter agent to forward all metric samples to HTM engine
    g_log.info("Last emitted sample datetime has not been established yet; "
               "deferring metric sample forwarding to Twitter Agent.")
    return

  metricDataForwarder = twitter_direct_agent.MetricDataForwarder(
    metricSpecs=twitter_direct_agent.loadMetricSpecs(),
    aggSec=aggPeriod)

  metricDataForwarder.aggregateAndForward(
    aggStartDatetime=aggStartDatetime,
    stopDatetime=lastEmittedAggTime + timedelta(seconds=aggPeriod),
    metrics=[newMetricName])


  # Forward tweet media to dynamodb
  g_log.info("Forwarding twitter tweets to dynamodb using new symbol...")
  migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])