def updateLastEmittedNonMetricSequence(key, seq):
  """ Update the last emitted sample timestamp value in the database for the
  Tweet Volume metrics

  :param str key: caller's key in schema.emittedNonMetricTracker
  :param int seq: sequence of last successfully-emitted non-metric
  """
  update = schema.emittedNonMetricTracker.update(  # pylint: disable=E1120
    ).values(
      last_seq=seq
    ).where(
      (schema.emittedNonMetricTracker.c.key == key)
    )

  result = collectorsdb.engineFactory().execute(update)

  # If update didn't find the key, then insert
  #
  # NOTE: sqlalchemy doesn't support "ON DUPLICATE KEY UPDATE" in its syntactic
  # sugar; see https://bitbucket.org/zzzeek/sqlalchemy/issue/960
  if result.rowcount == 0:
    # The row didn't exist, so create it
    collectorsdb.engineFactory().execute(
      schema.emittedNonMetricTracker.insert()  # pylint: disable=E1120
      .values(key=key, last_seq=seq))
def establishLastEmittedSampleDatetime(key, aggSec):
  """ Query UTC timestamp of the last emitted sample batch; if one hasn't been
  saved yet, then synthesize one, using negative aggregation period offset
  from current time

  :param int aggSec: aggregation period in seconds
  :returns: (possibly synthesized) UTC timestamp of the last
    successfully-emitted sample batch
  :rtype: datetime.datetime
  """
  lastEmittedTimestamp = queryLastEmittedSampleDatetime(key)
  if lastEmittedTimestamp is not None:
    return lastEmittedTimestamp

  # Start at the present to avoid re-sending metric data that we may have
  # already sent to Taurus.
  lastEmittedTimestamp = (datetime.utcnow().replace(microsecond=0) -
                          timedelta(seconds=aggSec))
  collectorsdb.engineFactory().execute(
    schema.emittedSampleTracker.insert()  # pylint: disable=E1120
    .prefix_with("IGNORE", dialect="mysql")
    .values(key=key, sample_ts=lastEmittedTimestamp))

  # Query again after saving to account for mysql's loss of accuracy
  return queryLastEmittedSampleDatetime(key)
def _flagUnknownSymbolAsReported(symbol):
  """
  Flag unknown company symbol as reported in database

  :param str symbol: symbol of the company's security (e.g., "AAPL")
  """
  ins = schema.companySymbolFailures.insert(  # pylint: disable=E1120
    ).prefix_with('IGNORE', dialect="mysql").values(symbol=symbol)

  collectorsdb.engineFactory().execute(ins)

  g_log.debug("Saved unknown company symbol=%s", symbol)
def _flagUnknownSymbolAsReported(symbol):
    """
  Flag unknown company symbol as reported in database

  :param str symbol: symbol of the company's security (e.g., "AAPL")
  """
    ins = schema.companySymbolFailures.insert(  # pylint: disable=E1120
    ).prefix_with('IGNORE', dialect="mysql").values(symbol=symbol)

    collectorsdb.engineFactory().execute(ins)

    g_log.debug("Saved unknown company symbol=%s", symbol)
示例#5
0
    def testEngineFactorySingletonPattern(self):
        # Call collectorsdb.engineFactory()
        engine = collectorsdb.engineFactory()

        # Call collectorsdb.engineFactory() again and assert singleton
        engine2 = collectorsdb.engineFactory()
        self.assertIs(engine2, engine)

        # Call collectorsdb.engineFactory() in different process, assert raises
        # AssertionError
        with self.assertRaises(AssertionError):
            multiprocessing.Pool(processes=1).apply(collectorsdb.engineFactory)
  def testEngineFactorySingletonPattern(self):
    # Call collectorsdb.engineFactory()
    engine = collectorsdb.engineFactory()

    # Call collectorsdb.engineFactory() again and assert singleton
    engine2 = collectorsdb.engineFactory()
    self.assertIs(engine2, engine)

    # Call collectorsdb.engineFactory() in different process, assert raises
    # AssertionError
    with self.assertRaises(AssertionError):
      multiprocessing.Pool(processes=1).apply(collectorsdb.engineFactory)
def _saveScreenNameFailure(unmappedScreenName):
    """
  Save unmapped twitter handle in database

  :param unmappedScreenName: the twitter handle that is not valid anymore
  :type unmappedScreenName: string
  """

    ins = (
        collectorsdb.schema  # pylint: disable=E1120
        .twitterHandleFailures.insert().prefix_with(
            'IGNORE', dialect="mysql").values(handle=unmappedScreenName))

    collectorsdb.engineFactory().execute(ins)

    g_log.info("Saved unmapped twitter handle; handle=%s", unmappedScreenName)
  def testEmittedSampleDatetime(self):
    key = "bogus-test-key"

    # Establish initial sample datetime

    result = metric_utils.establishLastEmittedSampleDatetime(key, 300)

    # Cleanup
    self.addCleanup(
      collectorsdb.engineFactory().execute,
      schema.emittedSampleTracker.delete().where(  # pylint: disable=E1120
        (schema.emittedSampleTracker.c.key == key)
      )
    )

    self.assertIsInstance(result, datetime)

    # Update latest emitted sample datetime to now

    now = datetime.utcnow().replace(microsecond=0)
    metric_utils.updateLastEmittedSampleDatetime(key, now)

    # Verify that it was updated

    lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key)

    self.assertEqual(now, lastEmittedSample)
    self.assertLess(result, lastEmittedSample)
def _purgeStaleDeletionRecords(limit):
  """ Delete stale rows in schema.twitterDeletion table

  :param limit: max records to purge per call
  :returns: a sequence of tweet_uid's of deleted schema.twitterDeletion rows
  """
  twitterDeletionSchema = collectorsdb.schema.twitterDeletion

  # NOTE: we first query the row id's to delete, so we can return them for
  # accountability and debugging
  rowsToDeleteSel = sqlalchemy.select(
    [twitterDeletionSchema.c.tweet_uid]).where(
      twitterDeletionSchema.c.created_at <
      sqlalchemy.func.date_sub(
        sqlalchemy.func.current_timestamp(),
        sqlalchemy.text("INTERVAL %i DAY" % (_DELETION_ROW_EXPIRY_DAYS,)))
      ).limit(limit)

  numDeleted = 0
  with collectorsdb.engineFactory().begin() as conn:
    rowIdsToDelete = tuple(
      str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall()
    )

    if rowIdsToDelete:
      deletion = twitterDeletionSchema.delete(  # pylint: disable=E1120
        ).where(twitterDeletionSchema.c.tweet_uid.in_(rowIdsToDelete))

      numDeleted = conn.execute(deletion).rowcount

  if len(rowIdsToDelete) != numDeleted:
    g_log.error("Expected to delete %d tweet delition request rows, but "
                "actually deleted %d rows", len(rowIdsToDelete), numDeleted)

  return rowIdsToDelete
def _saveScreenNameFailure(unmappedScreenName):
  """
  Save unmapped twitter handle in database

  :param unmappedScreenName: the twitter handle that is not valid anymore
  :type unmappedScreenName: string
  """

  ins = (collectorsdb.schema  # pylint: disable=E1120
         .twitterHandleFailures.insert()
         .prefix_with('IGNORE', dialect="mysql")
         .values(handle=unmappedScreenName))

  collectorsdb.engineFactory().execute(ins)

  g_log.info("Saved unmapped twitter handle; handle=%s", unmappedScreenName)
示例#11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--symbol", required=True)

    args = parser.parse_args()

    expectedAnswer = "Yes-%s" % (random.randint(1, 30), )

    with collectorsdb.engineFactory().begin() as conn:
        answer = raw_input(
            "Attention!  You are about to reset the emitted status for the \"{}\""
            " stock symbol at {}.\n"
            "\n"
            "To back out immediately without making any changes, feel free to type "
            "anything but \"{}\" in the prompt below, and press return.\n"
            "\n"
            "Are you sure you want to continue? ".format(
                args.symbol, str(conn.engine), str(expectedAnswer)))

        if answer.strip() != expectedAnswer:
            print "Aborting - Wise choice, my friend. Bye."
            return 1

        deleteFromEmitted(conn, schema.emittedStockPrice, args.symbol)
        deleteFromEmitted(conn, schema.emittedStockVolume, args.symbol)
示例#12
0
    def testEmittedSampleDatetime(self):
        key = "bogus-test-key"

        # Establish initial sample datetime

        result = metric_utils.establishLastEmittedSampleDatetime(key, 300)

        # Cleanup
        self.addCleanup(
            collectorsdb.engineFactory().execute,
            schema.emittedSampleTracker.delete().where(  # pylint: disable=E1120
                (schema.emittedSampleTracker.c.key == key)))

        self.assertIsInstance(result, datetime)

        # Update latest emitted sample datetime to now

        now = datetime.utcnow().replace(microsecond=0)
        metric_utils.updateLastEmittedSampleDatetime(key, now)

        # Verify that it was updated

        lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key)

        self.assertEqual(now, lastEmittedSample)
        self.assertLess(result, lastEmittedSample)
def _purgeTweetsSlatedForDeletion(limit):
    """ Purge tweets that are slated for deletion as indicated by entries in the
  schema.twitterDeletion table

  :param limit: max records to purge per call
  :returns: a sequence of id's of deleted tweets
  """
    twitterTweetsSchema = collectorsdb.schema.twitterTweets
    twitterDeletionSchema = collectorsdb.schema.twitterDeletion

    # NOTE: we first query the row id's to delete, so we can return them for
    # accountability and debugging
    rowsToDeleteSel = sqlalchemy.select([twitterTweetsSchema.c.uid]).where(
        twitterTweetsSchema.c.uid.in_(
            sqlalchemy.select([twitterDeletionSchema.c.tweet_uid
                               ]))).limit(limit)

    numDeleted = 0
    with collectorsdb.engineFactory().begin() as conn:
        rowIdsToDelete = tuple(
            str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall())

        if rowIdsToDelete:
            tweetDeletion = twitterTweetsSchema.delete(  # pylint: disable=E1120
            ).where(twitterTweetsSchema.c.uid.in_(rowIdsToDelete))

            numDeleted = conn.execute(tweetDeletion).rowcount

    if len(rowIdsToDelete) != numDeleted:
        g_log.error(
            "Expected to delete %d tweets, but actually deleted %d tweets",
            len(rowIdsToDelete), numDeleted)

    return rowIdsToDelete
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument("--symbol", required=True)

  args = parser.parse_args()

  expectedAnswer = "Yes-%s" % (random.randint(1, 30),)

  with collectorsdb.engineFactory().begin() as conn:
    answer = raw_input(
      "Attention!  You are about to reset the emitted status for the \"{}\""
      " stock symbol at {}.\n"
      "\n"
      "To back out immediately without making any changes, feel free to type "
      "anything but \"{}\" in the prompt below, and press return.\n"
      "\n"
      "Are you sure you want to continue? ".format(args.symbol,
                                                   str(conn.engine),
                                                   str(expectedAnswer)))

    if answer.strip() != expectedAnswer:
      print "Aborting - Wise choice, my friend. Bye."
      return 1

    deleteFromEmitted(conn, schema.emittedStockPrice, args.symbol)
    deleteFromEmitted(conn, schema.emittedStockVolume, args.symbol)
def _purgeTweetsSlatedForDeletion(limit):
  """ Purge tweets that are slated for deletion as indicated by entries in the
  schema.twitterDeletion table

  :param limit: max records to purge per call
  :returns: a sequence of id's of deleted tweets
  """
  twitterTweetsSchema = collectorsdb.schema.twitterTweets
  twitterDeletionSchema = collectorsdb.schema.twitterDeletion


  # NOTE: we first query the row id's to delete, so we can return them for
  # accountability and debugging
  rowsToDeleteSel = sqlalchemy.select([twitterTweetsSchema.c.uid]).where(
    twitterTweetsSchema.c.uid.in_(
      sqlalchemy.select([twitterDeletionSchema.c.tweet_uid]))).limit(limit)

  numDeleted = 0
  with collectorsdb.engineFactory().begin() as conn:
    rowIdsToDelete = tuple(
      str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall()
    )

    if rowIdsToDelete:
      tweetDeletion = twitterTweetsSchema.delete(  # pylint: disable=E1120
        ).where(twitterTweetsSchema.c.uid.in_(rowIdsToDelete))

      numDeleted = conn.execute(tweetDeletion).rowcount

  if len(rowIdsToDelete) != numDeleted:
    g_log.error("Expected to delete %d tweets, but actually deleted %d tweets",
                len(rowIdsToDelete), numDeleted)

  return rowIdsToDelete
def _deleteSecurity(symbol):
  """Delete security from xignite_security table"""
  with collectorsdb.engineFactory().begin() as conn:
    conn.execute(
      schema.xigniteSecurity  # pylint: disable=E1120
      .delete()
      .where(schema.xigniteSecurity.c.symbol == symbol))
def updateLastEmittedSampleDatetime(key, sampleDatetime):
  """ Update the last emitted sample timestamp value in the database for the
  Tweet Volume metrics

  :param str key: caller's key in schema.emittedSampleTracker
  :param datetime sampleDatetime: UTC datetime of last successfully-emitted
    sample batch
  """
  update = schema.emittedSampleTracker.update(  # pylint: disable=E1120
    ).values(
      sample_ts=sampleDatetime
    ).where(
      (schema.emittedSampleTracker.c.key == key)
    )

  collectorsdb.engineFactory().execute(update)
def _purgeStaleDeletionRecords(limit):
    """ Delete stale rows in schema.twitterDeletion table

  :param limit: max records to purge per call
  :returns: a sequence of tweet_uid's of deleted schema.twitterDeletion rows
  """
    twitterDeletionSchema = collectorsdb.schema.twitterDeletion

    # NOTE: we first query the row id's to delete, so we can return them for
    # accountability and debugging
    rowsToDeleteSel = sqlalchemy.select([
        twitterDeletionSchema.c.tweet_uid
    ]).where(twitterDeletionSchema.c.created_at < sqlalchemy.func.date_sub(
        sqlalchemy.func.current_timestamp(),
        sqlalchemy.text("INTERVAL %i DAY" %
                        (_DELETION_ROW_EXPIRY_DAYS, )))).limit(limit)

    numDeleted = 0
    with collectorsdb.engineFactory().begin() as conn:
        rowIdsToDelete = tuple(
            str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall())

        if rowIdsToDelete:
            deletion = twitterDeletionSchema.delete(  # pylint: disable=E1120
            ).where(twitterDeletionSchema.c.tweet_uid.in_(rowIdsToDelete))

            numDeleted = conn.execute(deletion).rowcount

    if len(rowIdsToDelete) != numDeleted:
        g_log.error(
            "Expected to delete %d tweet delition request rows, but "
            "actually deleted %d rows", len(rowIdsToDelete), numDeleted)

    return rowIdsToDelete
    def testPurgeOldTweets(self):

        gcThresholdDays = 90

        now = datetime.utcnow()

        oldRows = [
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays + 1),
                 retweet=False,
                 lang="en-us"),
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays + 2),
                 retweet=False,
                 lang="en-us"),
        ]

        youngRows = [
            dict(uid=uuid.uuid1().hex,
                 created_at=now,
                 retweet=False,
                 lang="en-us"),
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays - 1),
                 retweet=False,
                 lang="en-us"),
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays - 2),
                 retweet=False,
                 lang="en-us"),
        ]

        allRows = oldRows + youngRows

        # Patch collectorsdb config to use a temporary database
        with collectorsdb_test_utils.ManagedTempRepository("purgetweets"):
            engine = collectorsdb.engineFactory()

            numInserted = engine.execute(
                schema.twitterTweets.insert(),  # pylint: disable=E1120
                allRows).rowcount

            self.assertEqual(numInserted, len(allRows))

            # Execute
            numDeleted = purge_old_tweets.purgeOldTweets(gcThresholdDays)

            # Verify

            self.assertEqual(numDeleted, len(oldRows))

            # Verify that only the old tweets got purged
            remainingRows = engine.execute(
                sql.select([schema.twitterTweets.c.uid])).fetchall()

            self.assertEqual(len(remainingRows), len(youngRows))

            self.assertItemsEqual([row["uid"] for row in youngRows],
                                  [row.uid for row in remainingRows])  # pylint: disable=E1101
  def testTransientErrorRetryDecorator(self):
    # Setup proxy.  We'll patch config later, so we need to cache the values
    # so that the original proxy may be restarted with the original params
    config = collectorsdb.CollectorsDbConfig()

    originalHost = config.get("repository", "host")
    originalPort = config.getint("repository", "port")

    def _startProxy():
      p = startProxy(originalHost, originalPort, 6033)
      p.next()
      return p

    proxy = _startProxy()
    self.addCleanup(proxy.send, "kill")

    # Patch collectorsdb config with local proxy
    with ConfigAttributePatch(config.CONFIG_NAME,
                              config.baseConfigDir,
                              (("repository", "host", "127.0.0.1"),
                               ("repository", "port", "6033"))):

      # Force refresh of engine singleton
      collectorsdb.resetEngineSingleton()
      engine = collectorsdb.engineFactory()

      # First, make sure valid query returns expected results
      res = collectorsdb.retryOnTransientErrors(engine.execute)("select 1")
      self.assertEqual(res.scalar(), 1)

      @collectorsdb.retryOnTransientErrors
      def _killProxyTryRestartProxyAndTryAgain(n=[]):  # pylint: disable=W0102
        if not n:
          # Kill the proxy on first attempt
          proxy.send("kill")
          proxy.next()
          try:
            engine.execute("select 1")
            self.fail("Proxy did not terminate as expected...")
          except sqlalchemy.exc.OperationalError:
            pass
          n.append(None)
        elif len(n) == 1:
          # Restore proxy in second attempt
          newProxy = _startProxy()
          self.addCleanup(newProxy.send, "kill")
          n.append(None)

        res = engine.execute("select 2")

        return res

      # Try again w/ retry decorator
      result = _killProxyTryRestartProxyAndTryAgain()

      # Verify that the expected value is eventually returned
      self.assertEqual(result.scalar(), 2)
示例#21
0
    def testTransientErrorRetryDecorator(self):
        # Setup proxy.  We'll patch config later, so we need to cache the values
        # so that the original proxy may be restarted with the original params
        config = collectorsdb.CollectorsDbConfig()

        originalHost = config.get("repository", "host")
        originalPort = config.getint("repository", "port")

        def _startProxy():
            p = startProxy(originalHost, originalPort, 6033)
            p.next()
            return p

        proxy = _startProxy()
        self.addCleanup(proxy.send, "kill")

        # Patch collectorsdb config with local proxy
        with ConfigAttributePatch(config.CONFIG_NAME, config.baseConfigDir,
                                  (("repository", "host", "127.0.0.1"),
                                   ("repository", "port", "6033"))):

            # Force refresh of engine singleton
            collectorsdb.resetEngineSingleton()
            engine = collectorsdb.engineFactory()

            # First, make sure valid query returns expected results
            res = collectorsdb.retryOnTransientErrors(
                engine.execute)("select 1")
            self.assertEqual(res.scalar(), 1)

            @collectorsdb.retryOnTransientErrors
            def _killProxyTryRestartProxyAndTryAgain(n=[]):  # pylint: disable=W0102
                if not n:
                    # Kill the proxy on first attempt
                    proxy.send("kill")
                    proxy.next()
                    try:
                        engine.execute("select 1")
                        self.fail("Proxy did not terminate as expected...")
                    except sqlalchemy.exc.OperationalError:
                        pass
                    n.append(None)
                elif len(n) == 1:
                    # Restore proxy in second attempt
                    newProxy = _startProxy()
                    self.addCleanup(newProxy.send, "kill")
                    n.append(None)

                res = engine.execute("select 2")

                return res

            # Try again w/ retry decorator
            result = _killProxyTryRestartProxyAndTryAgain()

            # Verify that the expected value is eventually returned
            self.assertEqual(result.scalar(), 2)
def queryLastEmittedSampleDatetime(key):
  """
  :param str key: caller's key in schema.emittedSampleTracker
  :returns: UTC timestamp of the last successfully-emitted sample batch; None if
    one hasn't been set up yet; see establishLastEmittedSampleDatetime
  :rtype: datetime.datetime if not None
  """
  sel = sql.select([schema.emittedSampleTracker.c.sample_ts]).where(
    schema.emittedSampleTracker.c.key == key)

  return collectorsdb.engineFactory().execute(sel).scalar()
def queryLastEmittedNonMetricSequence(key):
  """
  :param str key: caller's key in schema.emittedNonMetricTracker
  :returns: last emitted sequence number for non-metric source; None if one
    hasn't been saved yet.
  :rtype: int if not None
  """
  sel = sql.select([schema.emittedNonMetricTracker.c.last_seq]).where(
    schema.emittedNonMetricTracker.c.key == key)

  return collectorsdb.engineFactory().execute(sel).scalar()
def _clearUnknownSymbols():
    """
  Remove all rows from the company_symbol_failures table.
  """

    result = collectorsdb.engineFactory().execute(
        schema.companySymbolFailures.delete())  # pylint: disable=E1120

    if result.rowcount:
        g_log.info("Deleted %s rows from %s table", result.rowcount,
                   schema.companySymbolFailures)
def _clearUnknownSymbols():
  """
  Remove all rows from the company_symbol_failures table.
  """

  result = collectorsdb.engineFactory().execute(
    schema.companySymbolFailures.delete())  # pylint: disable=E1120

  if result.rowcount:
    g_log.info("Deleted %s rows from %s table",
               result.rowcount, schema.companySymbolFailures)
示例#26
0
def _queryCachedCompanySymbols():
  """Get the cached security symbols from the xignite_security table

  :returns: A sequence of stock symbols from the xignite_security table
  :rtype: sequence
  """
  engine = collectorsdb.engineFactory()

  return tuple(
    row.symbol for row in
    engine.execute(sql.select([schema.xigniteSecurity.c.symbol])).fetchall())
def _deleteScreenNameFailures():
  """
  Clear rows from the twitter_handle_failures table.
  """

  result = collectorsdb.engineFactory().execute(
    collectorsdb.schema.twitterHandleFailures.delete())  # pylint: disable=E1120

  if result.rowcount:
    g_log.info("Deleted %s rows from %s table",
               result.rowcount, collectorsdb.schema.twitterHandleFailures)
def _deleteScreenNameFailures():
    """
  Clear rows from the twitter_handle_failures table.
  """

    result = collectorsdb.engineFactory().execute(
        collectorsdb.schema.twitterHandleFailures.delete())  # pylint: disable=E1120

    if result.rowcount:
        g_log.info("Deleted %s rows from %s table", result.rowcount,
                   collectorsdb.schema.twitterHandleFailures)
    def securityExists(symbol):
      security = collectorsdb.engineFactory().execute(
        sql.select([schema.xigniteSecurity.c.symbol])
        .where(schema.xigniteSecurity.c.symbol == symbol)
      ).scalar()

      if security is not None:
        self.assertEqual(security, symbol)
        return True

      return False
def _unknownSymbolReported(symbol):
    """ Check if a specific company symbol already exists in the
  company_symbol_failures table.

  :param str symbol: symbol of the company's security (e.g., "AAPL")
  :returns: True, if symbol is already in the table. False, otherwise
  :rtype: bool
  """
    sel = schema.companySymbolFailures.select().where(
        schema.companySymbolFailures.c.symbol == symbol)
    rows = collectorsdb.engineFactory().execute(sel).fetchall()

    return len(rows) > 0
def _unknownSymbolReported(symbol):
  """ Check if a specific company symbol already exists in the
  company_symbol_failures table.

  :param str symbol: symbol of the company's security (e.g., "AAPL")
  :returns: True, if symbol is already in the table. False, otherwise
  :rtype: bool
  """
  sel = schema.companySymbolFailures.select(
    ).where(schema.companySymbolFailures.c.symbol == symbol)
  rows = collectorsdb.engineFactory().execute(sel).fetchall()

  return len(rows) > 0
  def testEngineFactorySingletonPattern(self, sqlalchemyMock):

    # Explicitly spec out sqlalchemy.create_engine()
    firstCall = Mock(spec_set=sqlalchemy.engine.base.Engine)
    sqlalchemyMock.create_engine.side_effect = [firstCall]

    # Call collectorsdb.engineFactory()
    engine = collectorsdb.engineFactory()
    self.assertIs(engine, firstCall)

    # Call collectorsdb.engineFactory() again and assert singleton
    engine2 = collectorsdb.engineFactory()
    self.assertIs(engine2, firstCall)
    self.assertEqual(sqlalchemyMock.create_engine.call_count, 1)

    # Call collectorsdb.engineFactory() in different process, assert raises
    # assertion error
    with patch("taurus_metric_collectors.collectorsdb.os.getpid",
               return_value=collectorsdb._EngineSingleton._pid + 1,
               autospec=True):
      with self.assertRaises(AssertionError):
        collectorsdb.engineFactory()
示例#33
0
    def testEngineFactorySingletonPattern(self, sqlalchemyMock):

        # Explicitly spec out sqlalchemy.create_engine()
        firstCall = Mock(spec_set=sqlalchemy.engine.base.Engine)
        sqlalchemyMock.create_engine.side_effect = [firstCall]

        # Call collectorsdb.engineFactory()
        engine = collectorsdb.engineFactory()
        self.assertIs(engine, firstCall)

        # Call collectorsdb.engineFactory() again and assert singleton
        engine2 = collectorsdb.engineFactory()
        self.assertIs(engine2, firstCall)
        self.assertEqual(sqlalchemyMock.create_engine.call_count, 1)

        # Call collectorsdb.engineFactory() in different process, assert raises
        # assertion error
        with patch("taurus_metric_collectors.collectorsdb.os.getpid",
                   return_value=collectorsdb._EngineSingleton._pid + 1,
                   autospec=True):
            with self.assertRaises(AssertionError):
                collectorsdb.engineFactory()
示例#34
0
    def testEmittedNonMetricSequence(self):
        key = "bogus-test-key"

        metric_utils.updateLastEmittedNonMetricSequence(key, 1)

        # Cleanup
        self.addCleanup(
            collectorsdb.engineFactory().execute,
            schema.emittedNonMetricTracker.delete().where(  # pylint: disable=E1120
                (schema.emittedNonMetricTracker.c.key == key)))

        lastEmittedSample = metric_utils.queryLastEmittedNonMetricSequence(key)

        self.assertEqual(1, lastEmittedSample)
    def testPurgeOldTweets(self):

        gcThresholdDays = 90

        now = datetime.utcnow()

        oldRows = [
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 1), retweet=False, lang="en-us"
            ),
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 2), retweet=False, lang="en-us"
            ),
        ]

        youngRows = [
            dict(uid=uuid.uuid1().hex, created_at=now, retweet=False, lang="en-us"),
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 1), retweet=False, lang="en-us"
            ),
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 2), retweet=False, lang="en-us"
            ),
        ]

        allRows = oldRows + youngRows

        # Patch collectorsdb config to use a temporary database
        with collectorsdb_test_utils.ManagedTempRepository("purgetweets"):
            engine = collectorsdb.engineFactory()

            numInserted = engine.execute(schema.twitterTweets.insert(), allRows).rowcount  # pylint: disable=E1120

            self.assertEqual(numInserted, len(allRows))

            # Execute
            numDeleted = purge_old_tweets.purgeOldTweets(gcThresholdDays)

            # Verify

            self.assertEqual(numDeleted, len(oldRows))

            # Verify that only the old tweets got purged
            remainingRows = engine.execute(sql.select([schema.twitterTweets.c.uid])).fetchall()

            self.assertEqual(len(remainingRows), len(youngRows))

            self.assertItemsEqual(
                [row["uid"] for row in youngRows], [row.uid for row in remainingRows]
            )  # pylint: disable=E1101
def _screenNameFailureReported(screenName):
    """ Check if a specific twitter handle already exists in the
  tweet_handle_failures table.

  :param screenName: twitter handle
  :type screenName: string
  :returns: True, if twitter handle is already in the table. False, otherwise
  :rtype: Boolean
  """
    table = collectorsdb.schema.twitterHandleFailures

    sel = (table.select().where(table.c.handle == screenName))
    rows = collectorsdb.engineFactory().execute(sel)

    return rows.rowcount != 0
def _screenNameFailureReported(screenName):
  """ Check if a specific twitter handle already exists in the
  tweet_handle_failures table.

  :param screenName: twitter handle
  :type screenName: string
  :returns: True, if twitter handle is already in the table. False, otherwise
  :rtype: Boolean
  """
  table = collectorsdb.schema.twitterHandleFailures

  sel = (table.select().where(table.c.handle == screenName))
  rows = collectorsdb.engineFactory().execute(sel)

  return rows.rowcount != 0
  def testEmittedNonMetricSequence(self):
    key = "bogus-test-key"

    metric_utils.updateLastEmittedNonMetricSequence(key, 1)

    # Cleanup
    self.addCleanup(
      collectorsdb.engineFactory().execute,
      schema.emittedNonMetricTracker.delete().where(  # pylint: disable=E1120
        (schema.emittedNonMetricTracker.c.key == key)
      )
    )

    lastEmittedSample = metric_utils.queryLastEmittedNonMetricSequence(key)

    self.assertEqual(1, lastEmittedSample)
    def addSecurity(symbol):
      self.addCleanup(_deleteSecurity, symbol)
      xignite_agent_utils.insertSecurity(
        engine=collectorsdb.engineFactory(),
        xigniteSecurity={
          "Symbol": symbol,
          "CIK": "CIK",
          "CUSIP": "CUSIP",
          "ISIN": "ISIN",
          "Valoren": "Valoren",
          "Name": "{sym} Inc.".format(sym=symbol),
          "Market": "Market",
          "MarketIdentificationCode": "mic1",
          "MostLiquidExchange": True,
          "CategoryOrIndustry": "CategoryOrIndustry"
        })

      self.assertTrue(securityExists(symbol),
                      "inserted {symbol} not found".format(symbol=symbol))
def poll(metricSpecs, apitoken, barlength, days):
  """ Poll XIgnite data for given metricspecs associated with the same symbol,
  returning only new data relative to previously fetched data

  :param metricSpecs: Sequence of one or more StockMetricSpec objects
    associated with the same stock symbol for which to conduct polling
  :param apitoken: XIgnite API Token
  :param barlength: Aggregation time period (in minutes)
  :param days: Number of days to request
  :type days: int
  :returns: security details (dict), and new data as a sequence of dicts
  :rtype: 2-tuple
  """
  try:
    symbol = metricSpecs[0].symbol

    now = datetime.datetime.now(_UTC_TZ) # Now, in UTC time
    now -= datetime.timedelta(minutes=(barlength + now.minute % barlength),
                              seconds=now.second) # Align and pad end time to
                                         # prevent too recent of a bucket from
                                         # being returned

    engine = collectorsdb.engineFactory()
    lastSample = _getLatestSample(engine, symbol)

    if lastSample:
      localizedLastEndTime = (
        getEasternLocalizedEndTimestampFromSampleRow(lastSample))

    else:
      # Need to bootstrap from existing file-based .history/ approach
      symbolFilename = getSymbolFilename(symbol)
      if os.path.isfile(symbolFilename):
        # TODO: TAUR-779 Remove this case once we've successfully migrated
        # away from file-based approach.
        with open(symbolFilename, "r+") as symbolFile:
          try:
            # Seek to end of file for latest sample
            lastline = StringIO(symbolFile.readlines()[-1])
            csvin = csv.reader(lastline)
            lastSample = dict(zip(_COLS, next(csvin)))
            localizedLastEndTime = (
              getEasternLocalizedTimestampFromSample(lastSample["EndDate"],
                                                     lastSample["EndTime"],
                                                     lastSample["UTCOffset"]))

          except IndexError:
            # File is empty
            lastSample = {}
            localizedLastEndTime = (
              ((now - datetime.timedelta(days=days)).astimezone(_EASTERN_TZ)))
      else:
        localizedLastEndTime = (
          ((now - datetime.timedelta(days=days)).astimezone(_EASTERN_TZ)))


    # Set start time to match last end, and end to be now
    # Use Eastern because that's what XIgnite assumes in the API
    localizedStartTime = localizedLastEndTime
    localizedEndTime = now.astimezone(_EASTERN_TZ)

    # Fetch XIgnite data

    fields = ["Outcome",
              "Message",
              "Identity",
              "Delay",
              "Security",
              "Security.CIK",
              "Security.CUSIP",
              "Security.Symbol",
              "Security.ISIN",
              "Security.Valoren",
              "Security.Name",
              "Security.Market",
              "Security.MarketIdentificationCode",
              "Security.MostLiquidExchange",
              "Security.CategoryOrIndustry",
              "Bars",
              "Bars.StartDate",
              "Bars.StartTime",
              "Bars.EndDate",
              "Bars.EndTime",
              "Bars.UTCOffset",
              "Bars.Open",
              "Bars.High",
              "Bars.Low",
              "Bars.Trades"]

    for spec in metricSpecs:
      fields.append("Bars.%s" % (spec.sampleKey,))

    try:
      data = getData(symbol=symbol,
                     apitoken=apitoken,
                     barlength=barlength,
                     startTime=localizedStartTime.strftime(DATE_FMT),
                     endTime=localizedEndTime.strftime(DATE_FMT),
                     fields=fields)
    except Exception as e:
      _LOG.exception("Unexpected error while retrieving data from XIgnite.")
      return {"Symbol": symbol}, []

    if (data and "Bars" in data and "Outcome" in data and
        data["Outcome"] == "Success"):
      # Return only the new data


      def sampleStartIsGreaterThanOrEqualToLastEndTime(sample):
        # Compare w/ consistent timezones.
        return (
          getEasternLocalizedTimestampFromSample(sample["StartDate"],
                                                 sample["StartTime"],
                                                 sample["UTCOffset"])
          >= localizedLastEndTime)


      return (data["Security"],
              [sample for sample in data["Bars"]
               if (not lastSample or
                   sampleStartIsGreaterThanOrEqualToLastEndTime(sample))])

    else:
      return {"Symbol": symbol}, []
  except Exception:
    _LOG.exception("poll failed for metricSpecs=%s", metricSpecs)
    raise
示例#41
0
 def testCollectorsdbIsAccessible(self):  # pylint: disable=R0201
     transactionContext = collectorsdb.engineFactory().begin()
     transactionContext.transaction.rollback()
示例#42
0
def purgeOldTweets(thresholdDays):
    """ Purge tweets from twitter_tweets table that are older than the given
  number of days.

  :param int thresholdDays: tweets older than this many days will be deleted

  :returns: number of rows that were deleted

  """
    twitterTweetsSchema = collectorsdb.schema.twitterTweets

    g_log.info("Estimating number of tweets in table=%s older than numDays=%s",
               twitterTweetsSchema, thresholdDays)

    sqlEngine = collectorsdb.engineFactory()

    selectionPredicate = (twitterTweetsSchema.c.created_at < sql.func.date_sub(
        sql.func.utc_timestamp(),
        sql.text("INTERVAL {:d} DAY".format(thresholdDays))))

    estimate = _estimateNumTweetsToDelete(sqlEngine, selectionPredicate)

    g_log.info("Number of candidate old tweets to purge: estimate=%s",
               estimate)

    if estimate == 0:
        return 0

    # NOTE: We'll be deleting in smaller batches to avoid "Lock wait timeout
    # exceeded".
    #
    # When the number of old rows is huge, if we try to delete them in a single
    # transaction, we get perpetually mired in the error "Lock wait timeout
    # exceeded; try restarting transaction". mysql/innodb provides the setting
    # `innodb_lock_wait_timeout` that may be overriden, but there isn't a good way
    # to estimate a value that guarantees success. Doing it in one transaction
    # also doesn't facilitate progress update, thus creating the perception that
    # the operation is "stuck".
    totalDeleted = 0

    while totalDeleted < estimate:
        # NOTE: we're dealing with a couple of issues here:
        #
        # 1. sqlalchemy core doesn't support LIMIT in delete statements, so we can't
        #    use delete directly with LIMIT and ORDER BY
        # 2. MySql (5.6.21) doesn't support LIMIT & IN subqueries: "This version of
        #    MySQL doesn't yet support 'LIMIT & IN/ALL/ANY/SOME subquery"
        #
        # So, we're going to stick with sqlalchemy, and break the operation into two
        # queries: get the candidate uids, then delete rows with those uids

        limit = min(_MAX_DELETE_BATCH_SIZE, estimate - totalDeleted)

        uids = _queryCandidateRows(sqlEngine=sqlEngine,
                                   selectionPredicate=selectionPredicate,
                                   limit=limit)

        if uids:
            numDeleted = _deleteRows(sqlEngine=sqlEngine, uids=uids)
        else:
            # This could happen if something else deleted tweets in our range, such
            # as the process_tweet_deletions service that services deletion requests
            # from Twitter.
            break

        totalDeleted += numDeleted

        g_log.info("Purged %s old tweets [%s of %s]", numDeleted, totalDeleted,
                   estimate)

    g_log.info("Purged numRows=%s of estimated=%s old tweets from table=%s",
               totalDeleted, estimate, twitterTweetsSchema)

    return totalDeleted
def purgeOldTweets(thresholdDays):
  """ Purge tweets from twitter_tweets table that are older than the given
  number of days.

  :param int thresholdDays: tweets older than this many days will be deleted

  :returns: number of rows that were deleted

  """
  twitterTweetsSchema = collectorsdb.schema.twitterTweets

  g_log.info("Estimating number of tweets in table=%s older than numDays=%s",
             twitterTweetsSchema, thresholdDays)

  sqlEngine = collectorsdb.engineFactory()

  selectionPredicate = (
    twitterTweetsSchema.c.created_at <
    sql.func.date_sub(sql.func.utc_timestamp(),
                      sql.text("INTERVAL {:d} DAY".format(thresholdDays)))
  )

  estimate = _estimateNumTweetsToDelete(sqlEngine, selectionPredicate)

  g_log.info("Number of candidate old tweets to purge: estimate=%s", estimate)

  if estimate == 0:
    return 0

  # NOTE: We'll be deleting in smaller batches to avoid "Lock wait timeout
  # exceeded".
  #
  # When the number of old rows is huge, if we try to delete them in a single
  # transaction, we get perpetually mired in the error "Lock wait timeout
  # exceeded; try restarting transaction". mysql/innodb provides the setting
  # `innodb_lock_wait_timeout` that may be overriden, but there isn't a good way
  # to estimate a value that guarantees success. Doing it in one transaction
  # also doesn't facilitate progress update, thus creating the perception that
  # the operation is "stuck".
  totalDeleted = 0

  while totalDeleted < estimate:
    # NOTE: we're dealing with a couple of issues here:
    #
    # 1. sqlalchemy core doesn't support LIMIT in delete statements, so we can't
    #    use delete directly with LIMIT and ORDER BY
    # 2. MySql (5.6.21) doesn't support LIMIT & IN subqueries: "This version of
    #    MySQL doesn't yet support 'LIMIT & IN/ALL/ANY/SOME subquery"
    #
    # So, we're going to stick with sqlalchemy, and break the operation into two
    # queries: get the candidate uids, then delete rows with those uids

    limit = min(_MAX_DELETE_BATCH_SIZE, estimate - totalDeleted)

    uids = _queryCandidateRows(sqlEngine=sqlEngine,
                               selectionPredicate=selectionPredicate,
                               limit=limit)

    if uids:
      numDeleted = _deleteRows(sqlEngine=sqlEngine, uids=uids)
    else:
      # This could happen if something else deleted tweets in our range, such
      # as the process_tweet_deletions service that services deletion requests
      # from Twitter.
      break

    totalDeleted += numDeleted

    g_log.info("Purged %s old tweets [%s of %s]", numDeleted, totalDeleted,
               estimate)


  g_log.info("Purged numRows=%s of estimated=%s old tweets from table=%s",
             totalDeleted, estimate, twitterTweetsSchema)

  return totalDeleted
示例#44
0
def migrate(metrics=None):
    """ Migrate _BACKLOG_DAYS worth of previously-collected tweets to the Taurus
  non-metric-data RabbitMQ exchange.

  :param metrics: optional sequence of metric names; if specified (not None),
    the migration will be limited to tweets corresponding to the given metric
    names.
  """
    startingTimestamp = datetime.utcnow() - timedelta(days=_BACKLOG_DAYS)

    sqlEngine = collectorsdb.engineFactory()

    # Retrieve the first twitter samples sequence number in the desired range
    # select min(seq) from twitter_tweet_samples where agg_ts >= startingTimestamp

    @collectorsdb.retryOnTransientErrors
    def queryMigrationRange():
        sel = (sql.select([
            sql.func.count(),
            sql.func.min(schema.twitterTweetSamples.c.seq),
            sql.func.max(schema.twitterTweetSamples.c.seq)
        ]).where(schema.twitterTweetSamples.c.agg_ts >= startingTimestamp))
        if metrics is not None:
            sel = sel.where(schema.twitterTweetSamples.c.metric.in_(metrics))

        return sqlEngine.execute(sel).first()

    totalNumItems, minSeq, maxSeq = queryMigrationRange()
    if totalNumItems == 0:
        g_log.info("Nothing forwarded: no tweet samples found since %s UTC",
                   startingTimestamp.isoformat())
        return

    if metrics is None:
        g_log.info(
            "Starting migration of tweets from %s UTC; totalNumItems=%d; "
            "minSeq=%s, maxSeq=%s", startingTimestamp.isoformat(),
            totalNumItems, minSeq, maxSeq)
    else:
        g_log.info(
            "Starting migration of tweets from %s UTC; totalNumItems=%d; "
            "minSeq=%s, maxSeq=%s; metrics=%s", startingTimestamp.isoformat(),
            totalNumItems, minSeq, maxSeq, metrics)

    # Retrieve and publish batches
    totalNumPublished = 0
    totalNumBatches = 0
    batchMinSeq = minSeq
    with MessageBusConnector() as messageBus:
        while True:
            batchEndSeq, batch = TweetForwarder.queryNonMetricTweetBatch(
                sqlEngine=sqlEngine,
                minSeq=batchMinSeq,
                maxItems=200,
                maxSeq=maxSeq,
                metrics=metrics)

            if batchEndSeq is None:
                break

            TweetForwarder.publishNonMetricTweetBatch(messageBus=messageBus,
                                                      batch=batch)

            totalNumPublished += len(batch)
            totalNumBatches += 1

            g_log.debug(
                "Published numItems=%d; batchMinSeq=%s; batchEndSeq=%s "
                "(%d of %d: %s%%)", len(batch), batchMinSeq, batchEndSeq,
                totalNumPublished, totalNumItems,
                int(float(totalNumPublished) / totalNumItems * 100))

            if (totalNumBatches %
                    250) == 0 or totalNumPublished == totalNumItems:
                # Progress report
                g_log.info("Published %d of %d: %s%%", totalNumPublished,
                           totalNumItems,
                           int(float(totalNumPublished) / totalNumItems * 100))

            # Prepare for next query
            batchMinSeq = batchEndSeq + 1

    g_log.info(
        "Done publishing! publishedBatches=%d, publishedItems=%d, "
        "expectedItems=%d; minSeq=%s, maxSeq=%s", totalNumBatches,
        totalNumPublished, totalNumItems, minSeq, maxSeq)
 def testCollectorsdbIsAccessible(self):  # pylint: disable=R0201
   transactionContext = collectorsdb.engineFactory().begin()
   transactionContext.transaction.rollback()
def deleteCompanies(tickerSymbols,
                    engineServer,
                    engineApiKey,
                    warnAboutDestructiveAction=True,
                    warningTimeout=_DEFAULT_WARNING_PROMPT_TIMEOUT_SEC):
    """Delete companies from Taurus Collector and their metrics/models from
  Taurus Engine.

  :param sequence tickerSymbols: stock ticker symbols of companies to be
    deleted

  :param str engineServer: dns name of ip addres of Taurus API server

  :param str engineApiKey: API Key of Taurus HTM Engine

  :param bool warnAboutDestructiveAction: whether to warn about destructive
    action; defaults to True.

  :param float warningTimeout: Timeout for the warning prompt; ignored if
    warnAboutDestructiveAction is False

  :raises WarningPromptTimeout: if warning prompt timed out
  :raises UserAbortedOperation: if user chose to abort the operation
  :raises FlusherMetricNotFound:
  """
    tickerSymbols = tuple(symbol.upper() for symbol in tickerSymbols)

    # Check for duplicate symbols
    repeatedSymbols = set(sym for sym in tickerSymbols
                          if tickerSymbols.count(sym) > 1)
    if repeatedSymbols:
        raise ValueError(
            "{numRepeats} symbol(s) are present more than once in "
            "tickerSymbols arg: {repeats}".format(
                numRepeats=len(repeatedSymbols), repeats=repeatedSymbols))

    # Set will be handier going forward
    tickerSymbols = set(tickerSymbols)

    if warnAboutDestructiveAction:
        _warnAboutDestructiveAction(timeout=warningTimeout,
                                    tickerSymbols=tickerSymbols,
                                    engineServer=engineServer)

    # If any of the ticker symbols still appear in the collector's metrics config,
    # abort the operation as a precautionary measure.
    allSymbols = set(security[0].upper()
                     for security in metric_utils.getAllMetricSecurities())

    problemSymbols = tickerSymbols & allSymbols
    assert not problemSymbols, (
        "Can't delete - {numProblem} of the specified companies [{symbols}] are "
        "in active metrics configuration".format(
            numProblem=len(problemSymbols), symbols=problemSymbols))

    # First, we need to synchronize with Taurus Engine's metric data path.
    # If any of the data still in the pipeline is for any of the companies being
    # deleted, then the metrics may be re-created in the Engine after we delete
    # them. This is an yet unresolved subtlety with custom metrics in htmengine.
    _flushTaurusEngineMetricDataPath(engineServer, engineApiKey)

    # NOTE: We must query custom metrics after flushing the metric data path,
    # since metrics may get created as a side-effect of processing metric data.
    allMetricsMap = {
        obj["name"]: obj
        for obj in metric_utils.getAllCustomMetrics(host=engineServer,
                                                    apiKey=engineApiKey)
    }

    allMetricNames = allMetricsMap.keys()

    for symbolNum, symbol in enumerate(tickerSymbols, 1):
        # Delete corresponding metrics from Taurus Engine
        metricNamesToDelete = metric_utils.filterCompanyMetricNamesBySymbol(
            allMetricNames, symbol)
        if not metricNamesToDelete:
            g_log.info("No metrics to delete for symbol=%s (%d of %d)", symbol,
                       symbolNum, len(tickerSymbols))
            continue

        g_log.info(
            "Deleting metrics and models for ticker symbol=%s from Taurus "
            "Engine=%s (%d of %d)", symbol, engineServer, symbolNum,
            len(tickerSymbols))

        for metricName in metricNamesToDelete:
            metric_utils.deleteMetric(host=engineServer,
                                      apiKey=engineApiKey,
                                      metricName=metricName)
            g_log.info("Deleted metric name=%s, uid=%s", metricName,
                       allMetricsMap[metricName]["uid"])

        # Delete the symbol from xignite_security table last; this cascades to
        # delete related rows in other tables via cascading delete relationship.
        #
        # NOTE: garbage collection from other tables not tied to xiginte_security
        #  symbols presently depends on aging of the rows (e.g., twitter tables).
        #  After ENG-83, all company-specific rows from all tables will be
        # cleaned up and THIS NOTE SHOULD THEN BE REMOVED
        with collectorsdb.engineFactory().begin() as conn:
            numDeleted = (
                conn.execute(collectorsdb.schema.xigniteSecurity  # pylint: disable=E1120
                             .delete().where(
                                 collectorsdb.schema.xigniteSecurity.c.symbol
                                 == symbol))).rowcount

            if numDeleted:
                g_log.info("Deleted row=%s from table=%s", symbol,
                           collectorsdb.schema.xigniteSecurity)
            else:
                g_log.warning(
                    "Couldn't delete security row=%s: not found in table=%s",
                    symbol, collectorsdb.schema.xigniteSecurity)
def forward(metricSpecs, data, security, server=DEFAULT_SERVER,
            port=DEFAULT_PORT,
            dryrun=DEFAULT_DRYRUN):
  """ Forward stock data to HTM-IT/Taurus instance via custom metric

  :param metricSpecs: Sequence of one or more StockMetricSpec objects associated
    with the same stock symbol for which polling was conducted
  :param list data: List of sample dicts
  :param dict security: Details of security from XIgnite API
  """
  try:
    symbol = security["Symbol"]

    engine = collectorsdb.engineFactory()

    lastSample = _getLatestSample(engine, symbol)
    if lastSample:
      localizedLastEndTime = (
        getEasternLocalizedEndTimestampFromSampleRow(lastSample))
    else:
      localizedLastEndTime = None

    # Implemented in two phases:
    #
    # 1. Buffer records to collectorsdb

    for sample in data:
      localizedSampleStartTime = (
        getEasternLocalizedTimestampFromSample(sample["StartDate"],
                                               sample["StartTime"],
                                               sample["UTCOffset"]))

      if localizedSampleStartTime.time() < NAIVE_MARKET_OPEN_TIME:
        # Ignore samples that preceed market open
        _LOG.info("Skipping data before market hours: %s @ %s sample=%s",
                  symbol, localizedSampleStartTime, sample)
        continue

      if localizedSampleStartTime.time() >= NAIVE_MARKET_CLOSE_TIME:
        # Ignore a quirk of the xignite API that duplicates some data at
        # end of trading day. This also excludes the closing auction on
        # NYSE.
        _LOG.info("Skipping data after market hours: %s @ %s sample=%s",
                  symbol, localizedSampleStartTime, sample)
        continue

      if not lastSample or (localizedSampleStartTime >= localizedLastEndTime):
        # Current sample starts at, or after last recorded timestamp ends
        localizedSampleEndTime = (
          getEasternLocalizedTimestampFromSample(sample["EndDate"],
                                                 sample["EndTime"],
                                                 sample["UTCOffset"]))

        ins = (xigniteSecurityBars
               .insert()
               .values(symbol=symbol,
                       StartDate=localizedSampleStartTime.date(),
                       StartTime=localizedSampleStartTime.time(),
                       EndDate=localizedSampleEndTime.date(),
                       EndTime=localizedSampleEndTime.time(),
                       UTCOffset=sample["UTCOffset"],
                       Open=sample["Open"],
                       High=sample["High"],
                       Low=sample["Low"],
                       Close=sample["Close"],
                       Volume=sample["Volume"],
                       Trades=sample["Trades"]))

        @collectorsdb.retryOnTransientErrors
        def _insertBar():
          engine.execute(ins)

        try:
          _insertBar()
        except IntegrityError:
          # Most likely foreign key constraint violation against the
          # xignite_security table
          _LOG.info("Inserting security row for symbol=%s", symbol)
          xignite_agent_utils.insertSecurity(engine, security)

          # Re-insert after resolving IntegrityError
          _insertBar()

    #  2. If in active mode, send ALL un-sent records to Taurus

    if g_opMode != ApplicationConfig.OP_MODE_ACTIVE:
      return

    transmitMetricData(metricSpecs=metricSpecs, symbol=symbol, engine=engine)

  except Exception:
    _LOG.exception("forward failed for metricSpecs=%s", metricSpecs)
    raise
 def runQueryWithRetries():
   return collectorsdb.engineFactory().execute(cleanupQuery).rowcount
def deleteCompanies(tickerSymbols,
                    engineServer,
                    engineApiKey,
                    warnAboutDestructiveAction=True,
                    warningTimeout=_DEFAULT_WARNING_PROMPT_TIMEOUT_SEC):
  """Delete companies from Taurus Collector and their metrics/models from
  Taurus Engine.

  :param sequence tickerSymbols: stock ticker symbols of companies to be
    deleted

  :param str engineServer: dns name of ip addres of Taurus API server

  :param str engineApiKey: API Key of Taurus HTM Engine

  :param bool warnAboutDestructiveAction: whether to warn about destructive
    action; defaults to True.

  :param float warningTimeout: Timeout for the warning prompt; ignored if
    warnAboutDestructiveAction is False

  :raises WarningPromptTimeout: if warning prompt timed out
  :raises UserAbortedOperation: if user chose to abort the operation
  :raises FlusherMetricNotFound:
  """
  tickerSymbols = tuple(symbol.upper() for symbol in tickerSymbols)

  # Check for duplicate symbols
  repeatedSymbols = set(sym for sym in tickerSymbols
                        if tickerSymbols.count(sym) > 1)
  if repeatedSymbols:
    raise ValueError("{numRepeats} symbol(s) are present more than once in "
                     "tickerSymbols arg: {repeats}"
                     .format(numRepeats=len(repeatedSymbols),
                             repeats=repeatedSymbols))

  # Set will be handier going forward
  tickerSymbols = set(tickerSymbols)

  if warnAboutDestructiveAction:
    _warnAboutDestructiveAction(timeout=warningTimeout,
                                tickerSymbols=tickerSymbols,
                                engineServer=engineServer)

  # If any of the ticker symbols still appear in the collector's metrics config,
  # abort the operation as a precautionary measure.
  allSymbols = set(security[0].upper() for security in
                   metric_utils.getAllMetricSecurities())

  problemSymbols = tickerSymbols & allSymbols
  assert not problemSymbols, (
    "Can't delete - {numProblem} of the specified companies [{symbols}] are "
    "in active metrics configuration".format(numProblem=len(problemSymbols),
                                             symbols=problemSymbols))

  # First, we need to synchronize with Taurus Engine's metric data path.
  # If any of the data still in the pipeline is for any of the companies being
  # deleted, then the metrics may be re-created in the Engine after we delete
  # them. This is an yet unresolved subtlety with custom metrics in htmengine.
  _flushTaurusEngineMetricDataPath(engineServer, engineApiKey)

  # NOTE: We must query custom metrics after flushing the metric data path,
  # since metrics may get created as a side-effect of processing metric data.
  allMetricsMap = {
    obj["name"] : obj
    for obj in
    metric_utils.getAllCustomMetrics(host=engineServer, apiKey=engineApiKey)
  }

  allMetricNames = allMetricsMap.keys()

  for symbolNum, symbol in enumerate(tickerSymbols, 1):
    # Delete corresponding metrics from Taurus Engine
    metricNamesToDelete = metric_utils.filterCompanyMetricNamesBySymbol(
      allMetricNames,
      symbol)
    if not metricNamesToDelete:
      g_log.info("No metrics to delete for symbol=%s (%d of %d)", symbol,
                 symbolNum, len(tickerSymbols))
      continue

    g_log.info("Deleting metrics and models for ticker symbol=%s from Taurus "
               "Engine=%s (%d of %d)", symbol, engineServer,
               symbolNum, len(tickerSymbols))

    for metricName in metricNamesToDelete:
      metric_utils.deleteMetric(host=engineServer,
                                apiKey=engineApiKey,
                                metricName=metricName)
      g_log.info("Deleted metric name=%s, uid=%s", metricName,
                 allMetricsMap[metricName]["uid"])


    # Delete the symbol from xignite_security table last; this cascades to
    # delete related rows in other tables via cascading delete relationship.
    #
    # NOTE: garbage collection from other tables not tied to xiginte_security
    #  symbols presently depends on aging of the rows (e.g., twitter tables).
    #  After ENG-83, all company-specific rows from all tables will be
    # cleaned up and THIS NOTE SHOULD THEN BE REMOVED
    with collectorsdb.engineFactory().begin() as conn:
      numDeleted = (
        conn.execute(
          collectorsdb.schema.xigniteSecurity  # pylint: disable=E1120
          .delete()
          .where(collectorsdb.schema.xigniteSecurity.c.symbol == symbol))
        ).rowcount

      if numDeleted:
        g_log.info("Deleted row=%s from table=%s", symbol,
                   collectorsdb.schema.xigniteSecurity)
      else:
        g_log.warning(
          "Couldn't delete security row=%s: not found in table=%s",
          symbol, collectorsdb.schema.xigniteSecurity)