Exemplo n.º 1
0
def main(args):  # pragma: no cover
    opts = parse_args(args)
    commits_counter = ts_mon.CounterMetric('gsubtreed/commit_count')
    cref = gsubtreed.GsubtreedConfigRef(opts.repo)
    opts.repo.reify()

    summary = collections.defaultdict(int)

    def outer_loop_iteration():
        success, paths_counts = gsubtreed.inner_loop(opts.repo, cref)
        for path, count in paths_counts.iteritems():
            summary[path] += count
            commits_counter.increment_by(count, fields={'path': path})
        return success

    loop_results = outer_loop.loop(task=outer_loop_iteration,
                                   sleep_timeout=lambda: cref['interval'],
                                   **opts.loop_opts)

    if opts.json_output:
        with open(opts.json_output, 'w') as f:
            json.dump(
                {
                    'error_count': loop_results.error_count,
                    'summary': summary,
                }, f)

    return 0 if loop_results.success else 1
Exemplo n.º 2
0
class ClientMonitor(jsonfeed.JsonFeed):
    """JSON feed to track client side js errors in ts_mon."""

    js_errors = ts_mon.CounterMetric(
        'frontend/js_errors', 'Number of uncaught client-side JS errors.',
        None)

    def HandleRequest(self, mr):
        """Build up a dictionary of data values to use when rendering the page.

    Args:
      mr: commonly used info parsed from the request.

    Returns:
      Dict of values used by EZT for rendering the page.
    """

        post_data = mr.request.POST
        errors = post_data.get('errors')
        try:
            errors = json.loads(errors)

            total_errors = 0
            for error_key in errors:
                total_errors += errors[error_key]
            logging.error('client monitor report (%d): %s', total_errors,
                          post_data.get('errors'))
            self.js_errors.increment_by(total_errors)
        except Exception as e:
            logging.error('Problem processing client monitor report: %r', e)

        return {}
Exemplo n.º 3
0
class ApiRateLimiter(RateLimiter):

    blocked_requests = ts_mon.CounterMetric(
        'monorail/apiratelimiter/blocked_request',
        'Count of requests that exceeded the rate limit and were blocked.',
        None)
    limit_exceeded = ts_mon.CounterMetric(
        'monorail/apiratelimiter/rate_exceeded',
        'Count of requests that exceeded the rate limit.', None)
    cost_thresh_exceeded = ts_mon.CounterMetric(
        'monorail/apiratelimiter/cost_thresh_exceeded',
        'Count of requests that were expensive to process', None)
    checks = ts_mon.CounterMetric(
        'monorail/apiratelimiter/check',
        'Count of checks done, by fail/success type.',
        [ts_mon.StringField('type')])

    #pylint: disable=arguments-differ
    def CheckStart(self, client_id, client_email, now=None):
        if now is None:
            now = time.time()

        keysets = _CreateApiCacheKeys(client_id, client_email, now)
        qpm_limit = client_config_svc.GetQPMDict().get(client_email,
                                                       DEFAULT_API_QPM)
        window_limit = qpm_limit * N_MINUTES
        self._AuxCheckStart(keysets, window_limit,
                            settings.api_ratelimiting_enabled,
                            ApiRateLimitExceeded(client_id, client_email))

    #pylint: disable=arguments-differ
    def CheckEnd(self, client_id, client_email, now, start_time):
        if not settings.ratelimiting_cost_enabled:
            return

        elapsed_ms = (now - start_time) * 1000

        if elapsed_ms < settings.api_ratelimiting_cost_thresh_ms:
            return

        keysets = _CreateApiCacheKeys(client_id, client_email, start_time)
        self._AuxCheckEnd(
            keysets, 'API Rate Limit Cost Threshold Exceeded: %s, %s' %
            (client_id, client_email), settings.api_ratelimiting_cost_penalty)
Exemplo n.º 4
0
class Poller(threading.Thread):

    commits_metric = ts_mon.CounterMetric(
        'bugdroid/commits', 'Counter of commits processed by bugdroid', [
            ts_mon.StringField('poller'),
            ts_mon.StringField('project'),
            ts_mon.StringField('status')
        ])

    def __init__(self,
                 interval_in_minutes=15,
                 setup_refresh_interval_minutes=0,
                 run_once=False):
        threading.Thread.__init__(self, name=str(hash(self)))
        self.interval = interval_in_minutes * 60
        self.refresh_interval = setup_refresh_interval_minutes
        self.run_once = run_once

        if setup_refresh_interval_minutes:
            self.setup_refresh = (
                datetime.datetime.now() +
                datetime.timedelta(minutes=setup_refresh_interval_minutes))
        else:
            self.setup_refresh = None

    def execute(self):
        raise NotImplementedError()

    def setup(self):  # pylint: disable=R0201
        return True

    def run(self):
        try:
            while True:
                if self.setup_refresh and self.setup_refresh < datetime.datetime.now(
                ):
                    LOGGER.info('Re-running Poller setup')
                    self.setup()
                    self.setup_refresh = (
                        datetime.datetime.now() +
                        datetime.timedelta(minutes=self.refresh_interval))

                self.execute()

                if self.run_once:
                    return

                time.sleep(self.interval)
        except Exception:
            LOGGER.exception('Unhandled Poller exception.')

    def start(self):
        if self.setup():
            super(Poller, self).start()
Exemplo n.º 5
0
def main(args):  # pragma: no cover
    opts = parse_args(args)
    commits_counter = ts_mon.CounterMetric('gnumbd/commit_count')
    cref = gnumbd.GnumbdConfigRef(opts.repo)
    opts.repo.reify()

    all_commits = []

    def outer_loop_iteration():
        success, commits = gnumbd.inner_loop(opts.repo, cref)
        all_commits.extend(commits)
        commits_counter.increment_by(len(commits))
        return success

    # TODO(iannucci): sleep_timeout should be an exponential backon/off.
    #   Whenever we push, we should decrease the interval at 'backon_rate'
    #   until we hit 'min_interval'.
    #   Whenever we fail/NOP, we should back off at 'backoff_rate' until we
    #   hit 'max_interval'.
    #
    #   When all is going well, this should be looping at < 1 sec. If things
    #   start going sideways, we should automatically back off.
    loop_results = outer_loop.loop(task=outer_loop_iteration,
                                   sleep_timeout=lambda: cref['interval'],
                                   **opts.loop_opts)

    if opts.json_output:
        with open(opts.json_output, 'w') as f:
            json.dump(
                {
                    'error_count':
                    loop_results.error_count,
                    'synthesized_commits':
                    [{
                        'commit': c.hsh,
                        'footers': infra_types.thaw(c.data.footers),
                    } for c in all_commits],
                }, f)

    return 0 if loop_results.success else 1
Exemplo n.º 6
0
class RateLimiter(object):

    blocked_requests = ts_mon.CounterMetric(
        'monorail/ratelimiter/blocked_request',
        'Count of requests that exceeded the rate limit and were blocked.',
        None)
    limit_exceeded = ts_mon.CounterMetric(
        'monorail/ratelimiter/rate_exceeded',
        'Count of requests that exceeded the rate limit.', None)
    cost_thresh_exceeded = ts_mon.CounterMetric(
        'monorail/ratelimiter/cost_thresh_exceeded',
        'Count of requests that were expensive to process', None)
    checks = ts_mon.CounterMetric(
        'monorail/ratelimiter/check',
        'Count of checks done, by fail/success type.',
        [ts_mon.StringField('type')])

    def __init__(self, _cache=memcache, fail_open=True, **_kwargs):
        self.fail_open = fail_open

    def CheckStart(self, request, now=None):
        if (modules.get_current_module_name() not in MODULE_WHITELIST
                or users.is_current_user_admin()):
            return
        logging.info('X-AppEngine-Country: %s' %
                     request.headers.get(COUNTRY_HEADER, 'ZZ'))

        if now is None:
            now = time.time()

        keysets, country, ip, user_email = _CacheKeys(request, now)
        # There are either two or three sets of keys in keysets.
        # Three if the user's country is in COUNTRY_LIMITS, otherwise two.
        self._AuxCheckStart(
            keysets, COUNTRY_LIMITS.get(country, DEFAULT_LIMIT),
            settings.ratelimiting_enabled,
            RateLimitExceeded(country=country, ip=ip, user_email=user_email))

    def _AuxCheckStart(self, keysets, limit, ratelimiting_enabled,
                       exception_obj):
        for keys in keysets:
            count = 0
            try:
                counters = memcache.get_multi(keys)
                count = sum(counters.values())
                self.checks.increment({'type': 'success'})
            except Exception as e:
                logging.error(e)
                if not self.fail_open:
                    self.checks.increment({'type': 'fail_closed'})
                    raise exception_obj
                self.checks.increment({'type': 'fail_open'})

            if count > limit:
                # Since webapp2 won't let us return a 429 error code
                # <http://tools.ietf.org/html/rfc6585#section-4>, we can't
                # monitor rate limit exceeded events with our standard tools.
                # We return a 400 with a custom error message to the client,
                # and this logging is so we can monitor it internally.
                logging.info('%s, %d' % (exception_obj.message, count))

                self.limit_exceeded.increment()

                if ratelimiting_enabled:
                    self.blocked_requests.increment()
                    raise exception_obj

            k = keys[0]
            # Only update the latest *time* bucket for each prefix (reverse chron).
            memcache.add(k, 0, time=EXPIRE_AFTER_SECS)
            memcache.incr(k, initial_value=0)

    def CheckEnd(self, request, now, start_time):
        """If a request was expensive to process, charge some extra points
    against this set of buckets.
    We pass in both now and start_time so we can update the buckets
    based on keys created from start_time instead of now.
    now and start_time are float seconds.
    """
        if (modules.get_current_module_name() not in MODULE_WHITELIST
                or not settings.ratelimiting_cost_enabled):
            return

        elapsed_ms = (now - start_time) * 1000
        # Would it kill the python lib maintainers to have timedelta.total_ms()?
        if elapsed_ms < settings.ratelimiting_cost_thresh_ms:
            return

        # TODO: Look into caching the keys instead of generating them twice
        # for every request. Say, return them from CheckStart so they can
        # be bassed back in here later.
        keysets, country, ip, user_email = _CacheKeys(request, start_time)

        self._AuxCheckEnd(
            keysets, 'Rate Limit Cost Threshold Exceeded: %s, %s, %s' %
            (country, ip, user_email), settings.ratelimiting_cost_penalty)

    def _AuxCheckEnd(self, keysets, log_str, ratelimiting_cost_penalty):
        self.cost_thresh_exceeded.increment()
        for keys in keysets:
            logging.info(log_str)

            # Only update the latest *time* bucket for each prefix (reverse chron).
            k = keys[0]
            memcache.add(k, 0, time=EXPIRE_AFTER_SECS)
            memcache.incr(k, delta=ratelimiting_cost_penalty, initial_value=0)
Exemplo n.º 7
0
class EventMonUploader(webapp2.RequestHandler):
  num_test_results = ts_mon.CounterMetric(
      'test_results/num_test_results',
      'Number of reported test results',
      [ts_mon.StringField('result_type'),
       ts_mon.StringField('master'),
       ts_mon.StringField('builder'),
       ts_mon.StringField('test_type')])

  def post(self):
    if not self.request.body:
      logging.error('Missing request payload')
      self.response.set_status(400)
      return

    try:
      payload = json.loads(self.request.body)
    except ValueError:
      logging.error('Failed to parse request payload as JSON')
      self.response.set_status(400)
      return

    # Retrieve test json from datastore based on task parameters.
    master = payload.get('master')
    builder = payload.get('builder')
    build_number = payload.get('build_number')
    test_type = payload.get('test_type')
    step_name = payload.get('step_name')
    if (not master or not builder or build_number is None or not test_type or
        not step_name):
      logging.error(
          'Missing required parameters: (master=%s, builder=%s, '
          'build_number=%s, test_type=%s, step_name=%s)' %
          (master, builder, build_number, test_type, step_name))
      self.response.set_status(400)
      return

    files = TestFile.get_files(
        master, builder, test_type, build_number, 'full_results.json',
        load_data=True, limit=1)
    if not files:
      logging.error('Failed to find full_results.json for (%s, %s, %s, %s)' % (
                    master, builder, build_number, test_type))
      self.response.set_status(404)
      return
    file_json = JsonResults.load_json(files[0].data)

    # Create a proto event and send it to event_mon.
    event = event_mon.Event('POINT')
    test_results = event.proto.test_results
    test_results.master_name = master
    test_results.builder_name = builder
    test_results.build_number = int(build_number)
    test_results.test_type = test_type
    test_results.step_name = step_name
    if 'interrupted' in file_json:
      test_results.interrupted = file_json['interrupted']
    if 'version' in file_json:
      test_results.version = file_json['version']
    if 'seconds_since_epoch' in file_json:
      test_results.usec_since_epoch = long(
          float(file_json['seconds_since_epoch']) * 1000 * 1000)

    def convert_test_result_type(json_val):
      self.num_test_results.increment({
          'result_type': json_val, 'master': master, 'builder': builder,
          'test_type': test_type})
      try:
        return (event_mon.protos.chrome_infra_log_pb2.TestResultsEvent.
                TestResultType.Value(json_val.upper().replace('+', '_')))
      except ValueError:
        return event_mon.protos.chrome_infra_log_pb2.TestResultsEvent.UNKNOWN

    tests = util.flatten_tests_trie(
        file_json.get('tests', {}), file_json.get('path_delimiter', '/'))
    for name, test in tests.iteritems():
      test_result = test_results.tests.add()
      test_result.test_name = name
      test_result.actual.extend(
          convert_test_result_type(res) for res in test['actual'])
      test_result.expected.extend(
          convert_test_result_type(res) for res in test['expected'])

    event.send()
Exemplo n.º 8
0
from tracker import tracker_views

from infra_libs import ts_mon

NONCE_LENGTH = 32

if not settings.unit_test_mode:
    import MySQLdb

GC_COUNT = ts_mon.NonCumulativeDistributionMetric(
    'monorail/servlet/gc_count',
    'Count of objects in each generation tracked by the GC',
    [ts_mon.IntegerField('generation')])

GC_EVENT_REQUEST = ts_mon.CounterMetric(
    'monorail/servlet/gc_event_request',
    'Counts of requests that triggered at least one GC event', [])

# TODO(seanmccullough): Move this to services? Or context?
trace_service = None
if app_identity.get_application_id() != 'testing-app':
    logging.warning('app id: %s', app_identity.get_application_id())
    try:
        credentials = GoogleCredentials.get_application_default()
        trace_service = discovery.build('cloudtrace',
                                        'v1',
                                        credentials=credentials)
    except Exception as e:
        logging.warning('could not get trace service: %s', e)

Exemplo n.º 9
0
    ts_mon.StringField('result'),
    ts_mon.StringField('slave'),
    ts_mon.StringField('step_name'),
    ts_mon.StringField('subproject_tag'),
]

step_durations = ts_mon.CumulativeDistributionMetric(
    'buildbot/master/builders/steps/durations',
    'Time (in seconds) from step start to step end',
    step_field_spec,
    units=ts_mon.MetricsDataUnits.SECONDS,
    # Use fixed-width bucketer up to 2.7 hours with 10-second precision.
    bucketer=ts_mon.FixedWidthBucketer(10, 1000))

step_counts = ts_mon.CounterMetric(
    'buildbot/master/builders/steps/count',
    'Count of step results, per builder and step',
    step_field_spec)

field_spec = [
    ts_mon.StringField('builder'),
    ts_mon.StringField('master'),
    ts_mon.StringField('project_id'),
    ts_mon.StringField('result'),
    ts_mon.StringField('slave'),
    ts_mon.StringField('subproject_id'),
]

result_count = ts_mon.CounterMetric('buildbot/master/builders/results/count',
    'Number of items consumed from ts_mon.log by mastermon',
    field_spec)
# A custom bucketer with 12% resolution in the range of 1..10**5,
Exemplo n.º 10
0
from infra_libs import ts_mon

LOGGER = logging.getLogger(__name__)

LoopResults = collections.namedtuple(
    'LoopResults',
    [
        # True on no errors or if all failed attempts were successfully retried.
        'success',
        # Total number of errors seen (some may have been fixed with retries).
        'error_count',
    ],
)

count_metric = ts_mon.CounterMetric(
    'proc/outer_loop/count',
    'Counter of loop iterations for this process, by success or failure',
    [ts_mon.StringField('status')])
success_metric = ts_mon.BooleanMetric('proc/outer_loop/success',
                                      'Set immediately before the loop exits',
                                      None)
durations_metric = ts_mon.CumulativeDistributionMetric(
    'proc/outer_loop/durations',
    'Times (in seconds) taken to execute the task', None)


def loop(task, sleep_timeout, duration=None, max_errors=None, time_mod=time):
    """Runs the task in a loop for a given duration.

  Handles and logs all uncaught exceptions. ``task`` callback should return True
  on success, and False (or raise an exception) in error.
Exemplo n.º 11
0
def Counter(name, reset_after=False):
    """Returns a metric handle for a counter named |name|."""
    return ts_mon.CounterMetric(name)
Exemplo n.º 12
0
def Counter(name):
    """Returns a metric handle for a counter named |name|."""
    return ts_mon.CounterMetric(name)
Exemplo n.º 13
0
     'the subsequent issue page.'),
    field_spec=STANDARD_FIELDS,
    units=ts_mon.MetricsDataUnits.MILLISECONDS)
ISSUE_UPDATE_LATENCY_METRIC = ts_mon.CumulativeDistributionMetric(
    'monorail/frontend/issue_update_latency',
    ('Latency between Issue Update form submission and page load of '
     'the subsequent issue page.'),
    field_spec=STANDARD_FIELDS,
    units=ts_mon.MetricsDataUnits.MILLISECONDS)
AUTOCOMPLETE_POPULATE_LATENCY_METRIC = ts_mon.CumulativeDistributionMetric(
    'monorail/frontend/autocomplete_populate_latency',
    ('Latency between page load and autocomplete options loading.'),
    field_spec=STANDARD_FIELDS,
    units=ts_mon.MetricsDataUnits.MILLISECONDS)
CHARTS_SWITCH_DATE_RANGE_METRIC = ts_mon.CounterMetric(
    'monorail/frontend/charts/switch_date_range',
    ('Number of times user clicks frequency button.'),
    field_spec=STANDARD_FIELDS + [ts_mon.IntegerField('date_range')])

# Page load metrics.
ISSUE_COMMENTS_LOAD_EXTRA_FIELDS = [
    ts_mon.StringField('template_name'),
    ts_mon.BooleanField('full_app_load'),
]
ISSUE_COMMENTS_LOAD_LATENCY_METRIC = ts_mon.CumulativeDistributionMetric(
    'monorail/frontend/issue_comments_load_latency',
    ('Time from navigation or click to issue comments loaded.'),
    field_spec=STANDARD_FIELDS + ISSUE_COMMENTS_LOAD_EXTRA_FIELDS,
    units=ts_mon.MetricsDataUnits.MILLISECONDS)
DOM_CONTENT_LOADED_EXTRA_FIELDS = [ts_mon.StringField('template_name')]
DOM_CONTENT_LOADED_METRIC = ts_mon.CumulativeDistributionMetric(
    'frontend/dom_content_loaded', ('domContentLoaded performance timing.'),
Exemplo n.º 14
0
import json
import os
import sys
import urlparse

from infra.libs import git2
from infra.libs.service_utils import outer_loop
from infra.services.gsubtreed import gsubtreed
from infra_libs import logs
from infra_libs import ts_mon

# Return value of parse_args.
Options = collections.namedtuple('Options', 'repo loop_opts json_output')

commits_counter = ts_mon.CounterMetric(
    'gsubtreed/commit_count', 'Number of commits processed by gsubtreed',
    [ts_mon.StringField('path')])


def parse_args(args):  # pragma: no cover
    def check_url(s):
        parsed = urlparse.urlparse(s)
        if parsed.scheme not in ('https', 'git', 'file'):
            raise argparse.ArgumentTypeError(
                'Repo URL must use https, git or file protocol.')
        if not parsed.path.strip('/'):
            raise argparse.ArgumentTypeError('URL is missing a path?')
        return git2.Repo(s)

    parser = argparse.ArgumentParser('./run.py %s' % __package__)
    parser.add_argument('--dry_run',
Exemplo n.º 15
0
        description='Number of possible inodes on '
        'disk partition (unix only)')

mem_free = ts_mon.GaugeMetric('dev/mem/free',
                              description='Amount of memory available to a '
                              'process (in Bytes). Buffers are considered '
                              'free memory.',
                              units=ts_mon.MetricsDataUnits.BYTES)

mem_total = ts_mon.GaugeMetric('dev/mem/total',
                               description='Total physical memory in Bytes.',
                               units=ts_mon.MetricsDataUnits.BYTES)

START_TIME = psutil.boot_time()
net_up = ts_mon.CounterMetric('dev/net/bytes/up',
                              start_time=START_TIME,
                              description='Number of bytes sent on interface.',
                              units=ts_mon.MetricsDataUnits.BYTES)
net_down = ts_mon.CounterMetric('dev/net/bytes/down',
                                start_time=START_TIME,
                                description='Number of Bytes received on '
                                'interface.',
                                units=ts_mon.MetricsDataUnits.BYTES)
net_err_up = ts_mon.CounterMetric('dev/net/err/up',
                                  start_time=START_TIME,
                                  description='Total number of errors when '
                                  'sending (per interface).')
net_err_down = ts_mon.CounterMetric('dev/net/err/down',
                                    start_time=START_TIME,
                                    description='Total number of errors when '
                                    'receiving (per interface).')
net_drop_up = ts_mon.CounterMetric('dev/net/drop/up',
Exemplo n.º 16
0
import settings

if not settings.unit_test_mode:
    import MySQLdb

from framework import framework_helpers

from infra_libs import ts_mon

# MonorailConnection maintains a dictionary of connections to SQL databases.
# Each is identified by an int shard ID.
# And there is one connection to the master DB identified by key MASTER_CNXN.
MASTER_CNXN = 'master_cnxn'

CONNECTION_COUNT = ts_mon.CounterMetric(
    'monorail/sql/connection_count',
    'Count of connections made to the SQL database.',
    [ts_mon.BooleanField('success')])


@framework_helpers.retry(2, delay=1, backoff=2)
def MakeConnection(instance, database):
    logging.info('About to connect to SQL instance %r db %r', instance,
                 database)
    if settings.unit_test_mode:
        raise ValueError(
            'unit tests should not need real database connections')
    try:
        if settings.dev_mode:
            cnxn = MySQLdb.connect(host='127.0.0.1',
                                   port=3306,
                                   db=database,
Exemplo n.º 17
0
def loop(task, sleep_timeout, duration=None, max_errors=None, time_mod=time):
  """Runs the task in a loop for a given duration.

  Handles and logs all uncaught exceptions. ``task`` callback should return True
  on success, and False (or raise an exception) in error.

  Doesn't leak any exceptions (including KeyboardInterrupt).

  Args:
    @param task: Callable with no arguments returning True or False.
    @param sleep_timeout: A function returning how long to sleep between task
                          invocations (sec), called once per loop.
    @param duration: How long to run the loop (sec), or None for forever.
    @param max_errors: Max number of consecutive errors before loop aborts.
    @param time_mod: Object implementing the interface of the standard `time`
                     module. Used by tests to mock time.time and time.sleep.

  Returns:
    @returns LoopResults.
  """
  deadline = None if duration is None else (time_mod.time() + duration)
  errors_left = max_errors
  seen_success = False
  failed = False
  loop_count = 0
  error_count = 0
  count_metric = ts_mon.CounterMetric('proc/outer_loop/count')
  success_metric = ts_mon.BooleanMetric('proc/outer_loop/success')
  durations_metric = ts_mon.DistributionMetric('proc/outer_loop/durations')
  try:
    while True:
      # Log that new attempt is starting.
      start = time_mod.time()
      LOGGER.info('-------------------')
      if deadline is not None:
        LOGGER.info(
            'Begin loop %d (%.1f sec to deadline)',
            loop_count, deadline - start)
      else:
        LOGGER.info('Begin loop %d', loop_count)

      # Do it. Abort if number of consecutive errors is too large.
      attempt_success = False
      try:
        with ts_mon.ScopedIncrementCounter(count_metric) as cm:
          attempt_success = task()
          if not attempt_success:  # pragma: no cover
            cm.set_failure()       # Due to branch coverage bug in coverage.py
      except KeyboardInterrupt:
        raise
      except Exception:
        LOGGER.exception('Uncaught exception in the task')
      finally:
        elapsed = time_mod.time() - start
        LOGGER.info('End loop %d (%f sec)', loop_count, elapsed)
        durations_metric.add(elapsed)
        LOGGER.info('-------------------')

      # Reset error counter on success, or abort on too many errors.
      if attempt_success:
        seen_success = True
        errors_left = max_errors
      else:
        error_count += 1
        if errors_left is not None:
          errors_left -= 1
          if errors_left <= 0:
            failed = True
            LOGGER.warn(
                'Too many consecutive errors (%d), stopping.', max_errors)
            break

      # Sleep before trying again.
      # TODO(vadimsh): Make sleep timeout dynamic.
      now = time_mod.time()
      timeout = sleep_timeout()
      if deadline is not None and now + timeout >= deadline:
        when = now - deadline
        if when > 0:
          LOGGER.info('Deadline reached %.1f sec ago, stopping.', when)
        else:
          LOGGER.info('Deadline is in %.1f sec, stopping now', -when)
        break
      LOGGER.debug('Sleeping %.1f sec', timeout)
      time_mod.sleep(timeout)

      loop_count += 1
  except KeyboardInterrupt:
    seen_success = True
    LOGGER.warn('Stopping due to KeyboardInterrupt')

  success = not failed and seen_success
  success_metric.set(success)
  return LoopResults(success, error_count)
Exemplo n.º 18
0
class MonorailApi(remote.Service):

  # Class variables. Handy to mock.
  _services = None
  _mar = None

  api_requests = ts_mon.CounterMetric(
     'monorail/api_requests',
     'Number of requests to Monorail api',
     [ts_mon.StringField('client_id'), ts_mon.StringField('client_email')])

  ratelimiter = ratelimiter.ApiRateLimiter()

  @classmethod
  def _set_services(cls, services):
    cls._services = services

  def mar_factory(self, request):
    if not self._mar:
      self._mar = monorailrequest.MonorailApiRequest(request, self._services)
    return self._mar

  def aux_delete_comment(self, request, delete=True):
    mar = self.mar_factory(request)
    action_name = 'delete' if delete else 'undelete'

    issue = self._services.issue.GetIssueByLocalID(
        mar.cnxn, mar.project_id, request.issueId)
    all_comments = self._services.issue.GetCommentsForIssue(
        mar.cnxn, issue.issue_id)
    try:
      issue_comment = all_comments[request.commentId]
    except IndexError:
      raise issue_svc.NoSuchIssueException(
            'The issue %s:%d does not have comment %d.' %
            (mar.project_name, request.issueId, request.commentId))

    if not permissions.CanDelete(
        mar.auth.user_id, mar.auth.effective_ids, mar.perms,
        issue_comment.deleted_by, issue_comment.user_id, mar.project,
        permissions.GetRestrictions(issue), mar.granted_perms):
      raise permissions.PermissionException(
            'User is not allowed to %s the comment %d of issue %s:%d' %
            (action_name, request.commentId, mar.project_name,
             request.issueId))

    self._services.issue.SoftDeleteComment(
        mar.cnxn, mar.project_id, request.issueId, request.commentId,
        mar.auth.user_id, self._services.user, delete=delete)
    return api_pb2_v1.IssuesCommentsDeleteResponse()

  def increment_request_limit(self, request, client_id, client_email):
    """Check whether the requester has exceeded API quotas limit,
    and increment request count in DB and ts_mon.
    """
    mar = self.mar_factory(request)
    # soft_limit == hard_limit for api_request, so this function either
    # returns False if under limit, or raise ExcessiveActivityException
    if not actionlimit.NeedCaptcha(
        mar.auth.user_pb, actionlimit.API_REQUEST, skip_lifetime_check=True):
      actionlimit.CountAction(
          mar.auth.user_pb, actionlimit.API_REQUEST, delta=1)
      self._services.user.UpdateUser(
          mar.cnxn, mar.auth.user_id, mar.auth.user_pb)

    # Avoid value explosision and protect PII info
    if not framework_helpers.IsServiceAccount(client_email):
      client_email = '*****@*****.**'
    self.api_requests.increment_by(
        1, {'client_id': client_id, 'client_email': client_email})

  @monorail_api_method(
      api_pb2_v1.ISSUES_COMMENTS_DELETE_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.IssuesCommentsDeleteResponse,
      path='projects/{projectId}/issues/{issueId}/comments/{commentId}',
      http_method='DELETE',
      name='issues.comments.delete')
  def issues_comments_delete(self, request):
    """Delete a comment."""
    return self.aux_delete_comment(request, True)

  @monorail_api_method(
      api_pb2_v1.ISSUES_COMMENTS_INSERT_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.IssuesCommentsInsertResponse,
      path='projects/{projectId}/issues/{issueId}/comments',
      http_method='POST',
      name='issues.comments.insert')
  def issues_comments_insert(self, request):
    """Add a comment."""
    mar = self.mar_factory(request)
    issue = self._services.issue.GetIssueByLocalID(
        mar.cnxn, mar.project_id, request.issueId)
    old_owner_id = tracker_bizobj.GetOwnerId(issue)
    if not permissions.CanCommentIssue(
        mar.auth.effective_ids, mar.perms, mar.project, issue,
        mar.granted_perms):
      raise permissions.PermissionException(
          'User is not allowed to comment this issue (%s, %d)' %
          (request.projectId, request.issueId))

    updates_dict = {}
    if request.updates:
      if request.updates.moveToProject:
        move_to = request.updates.moveToProject.lower()
        move_to_project = issuedetail.CheckMoveIssueRequest(
            self._services, mar, issue, True, move_to, mar.errors)
        if mar.errors.AnyErrors():
          raise endpoints.BadRequestException(mar.errors.move_to)
        updates_dict['move_to_project'] = move_to_project

      updates_dict['summary'] = request.updates.summary
      updates_dict['status'] = request.updates.status
      if request.updates.owner:
        if request.updates.owner == framework_constants.NO_USER_NAME:
          updates_dict['owner'] = framework_constants.NO_USER_SPECIFIED
        else:
          updates_dict['owner'] = self._services.user.LookupUserID(
              mar.cnxn, request.updates.owner)
      updates_dict['cc_add'], updates_dict['cc_remove'] = (
          api_pb2_v1_helpers.split_remove_add(request.updates.cc))
      updates_dict['cc_add'] = self._services.user.LookupUserIDs(
          mar.cnxn, updates_dict['cc_add'], autocreate=True).values()
      updates_dict['cc_remove'] = self._services.user.LookupUserIDs(
          mar.cnxn, updates_dict['cc_remove']).values()
      updates_dict['labels_add'], updates_dict['labels_remove'] = (
          api_pb2_v1_helpers.split_remove_add(request.updates.labels))
      blocked_on_add_strs, blocked_on_remove_strs = (
          api_pb2_v1_helpers.split_remove_add(request.updates.blockedOn))
      updates_dict['blocked_on_add'] = api_pb2_v1_helpers.issue_global_ids(
          blocked_on_add_strs, issue.project_id, mar,
          self._services)
      updates_dict['blocked_on_remove'] = api_pb2_v1_helpers.issue_global_ids(
          blocked_on_remove_strs, issue.project_id, mar,
          self._services)
      blocking_add_strs, blocking_remove_strs = (
          api_pb2_v1_helpers.split_remove_add(request.updates.blocking))
      updates_dict['blocking_add'] = api_pb2_v1_helpers.issue_global_ids(
          blocking_add_strs, issue.project_id, mar,
          self._services)
      updates_dict['blocking_remove'] = api_pb2_v1_helpers.issue_global_ids(
          blocking_remove_strs, issue.project_id, mar,
          self._services)
      components_add_strs, components_remove_strs = (
          api_pb2_v1_helpers.split_remove_add(request.updates.components))
      updates_dict['components_add'] = (
          api_pb2_v1_helpers.convert_component_ids(
              mar.config, components_add_strs))
      updates_dict['components_remove'] = (
          api_pb2_v1_helpers.convert_component_ids(
              mar.config, components_remove_strs))
      if request.updates.mergedInto:
        merge_project_name, merge_local_id = tracker_bizobj.ParseIssueRef(
            request.updates.mergedInto)
        merge_into_project = self._services.project.GetProjectByName(
            mar.cnxn, merge_project_name or issue.project_name)
        merge_into_issue = self._services.issue.GetIssueByLocalID(
            mar.cnxn, merge_into_project.project_id, merge_local_id)
        merge_allowed = tracker_helpers.IsMergeAllowed(
            merge_into_issue, mar, self._services)
        if not merge_allowed:
          raise permissions.PermissionException(
            'User is not allowed to merge into issue %s:%s' %
            (merge_into_issue.project_name, merge_into_issue.local_id))
        updates_dict['merged_into'] = merge_into_issue.issue_id
      (updates_dict['field_vals_add'], updates_dict['field_vals_remove'],
       updates_dict['fields_clear'], updates_dict['fields_labels_add'],
       updates_dict['fields_labels_remove']) = (
          api_pb2_v1_helpers.convert_field_values(
              request.updates.fieldValues, mar, self._services))

    field_helpers.ValidateCustomFields(
        mar, self._services,
        (updates_dict.get('field_vals_add', []) +
         updates_dict.get('field_vals_remove', [])),
        mar.config, mar.errors)
    if mar.errors.AnyErrors():
      raise endpoints.BadRequestException(
          'Invalid field values: %s' % mar.errors.custom_fields)

    _, comment = self._services.issue.DeltaUpdateIssue(
        cnxn=mar.cnxn, services=self._services,
        reporter_id=mar.auth.user_id,
        project_id=mar.project_id, config=mar.config, issue=issue,
        status=updates_dict.get('status'), owner_id=updates_dict.get('owner'),
        cc_add=updates_dict.get('cc_add', []),
        cc_remove=updates_dict.get('cc_remove', []),
        comp_ids_add=updates_dict.get('components_add', []),
        comp_ids_remove=updates_dict.get('components_remove', []),
        labels_add=(updates_dict.get('labels_add', []) +
                    updates_dict.get('fields_labels_add', [])),
        labels_remove=(updates_dict.get('labels_remove', []) +
                       updates_dict.get('fields_labels_remove', [])),
        field_vals_add=updates_dict.get('field_vals_add', []),
        field_vals_remove=updates_dict.get('field_vals_remove', []),
        fields_clear=updates_dict.get('fields_clear', []),
        blocked_on_add=updates_dict.get('blocked_on_add', []),
        blocked_on_remove=updates_dict.get('blocked_on_remove', []),
        blocking_add=updates_dict.get('blocking_add', []),
        blocking_remove=updates_dict.get('blocking_remove', []),
        merged_into=updates_dict.get('merged_into'),
        index_now=False,
        comment=request.content,
        summary=updates_dict.get('summary'),
    )

    move_comment = None
    if 'move_to_project' in updates_dict:
      move_to_project = updates_dict['move_to_project']
      old_text_ref = 'issue %s:%s' % (issue.project_name, issue.local_id)
      tracker_fulltext.UnindexIssues([issue.issue_id])
      moved_back_iids = self._services.issue.MoveIssues(
          mar.cnxn, move_to_project, [issue], self._services.user)
      new_text_ref = 'issue %s:%s' % (issue.project_name, issue.local_id)
      if issue.issue_id in moved_back_iids:
        content = 'Moved %s back to %s again.' % (old_text_ref, new_text_ref)
      else:
        content = 'Moved %s to now be %s.' % (old_text_ref, new_text_ref)
      move_comment = self._services.issue.CreateIssueComment(
        mar.cnxn, move_to_project.project_id, issue.local_id, mar.auth.user_id,
        content, amendments=[
            tracker_bizobj.MakeProjectAmendment(move_to_project.project_name)])

    if 'merged_into' in updates_dict:
      new_starrers = tracker_helpers.GetNewIssueStarrers(
          mar.cnxn, self._services, issue.issue_id, merge_into_issue.issue_id)
      tracker_helpers.AddIssueStarrers(
          mar.cnxn, self._services, mar,
          merge_into_issue.issue_id, merge_into_project, new_starrers)
      _merge_comment = tracker_helpers.MergeCCsAndAddComment(
        self._services, mar, issue, merge_into_project, merge_into_issue)
      merge_into_issue_cmnts = self._services.issue.GetCommentsForIssue(
          mar.cnxn, merge_into_issue.issue_id)
      notify.PrepareAndSendIssueChangeNotification(
          merge_into_issue.issue_id, framework_helpers.GetHostPort(),
          mar.auth.user_id, len(merge_into_issue_cmnts) - 1, send_email=True)

    tracker_fulltext.IndexIssues(
        mar.cnxn, [issue], self._services.user, self._services.issue,
        self._services.config)

    comment = comment or move_comment
    if comment is None:
      return api_pb2_v1.IssuesCommentsInsertResponse()

    cmnts = self._services.issue.GetCommentsForIssue(mar.cnxn, issue.issue_id)
    seq = len(cmnts) - 1

    if request.sendEmail:
      notify.PrepareAndSendIssueChangeNotification(
          issue.issue_id, framework_helpers.GetHostPort(),
          comment.user_id, seq, send_email=True, old_owner_id=old_owner_id)

    can_delete = permissions.CanDelete(
      mar.auth.user_id, mar.auth.effective_ids, mar.perms,
      comment.deleted_by, comment.user_id, mar.project,
      permissions.GetRestrictions(issue), granted_perms=mar.granted_perms)
    return api_pb2_v1.IssuesCommentsInsertResponse(
        id=seq,
        kind='monorail#issueComment',
        author=api_pb2_v1_helpers.convert_person(
            comment.user_id, mar.cnxn, self._services),
        content=comment.content,
        published=datetime.datetime.fromtimestamp(comment.timestamp),
        updates=api_pb2_v1_helpers.convert_amendments(
            issue, comment.amendments, mar, self._services),
        canDelete=can_delete)

  @monorail_api_method(
      api_pb2_v1.ISSUES_COMMENTS_LIST_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.IssuesCommentsListResponse,
      path='projects/{projectId}/issues/{issueId}/comments',
      http_method='GET',
      name='issues.comments.list')
  def issues_comments_list(self, request):
    """List all comments for an issue."""
    mar = self.mar_factory(request)
    issue = self._services.issue.GetIssueByLocalID(
        mar.cnxn, mar.project_id, request.issueId)
    comments = self._services.issue.GetCommentsForIssue(
        mar.cnxn, issue.issue_id)
    visible_comments = []
    for comment in comments[
        request.startIndex:(request.startIndex + request.maxResults)]:
      visible_comments.append(
          api_pb2_v1_helpers.convert_comment(
              issue, comment, mar, self._services, mar.granted_perms))

    return api_pb2_v1.IssuesCommentsListResponse(
        kind='monorail#issueCommentList',
        totalResults=len(comments),
        items=visible_comments)

  @monorail_api_method(
      api_pb2_v1.ISSUES_COMMENTS_DELETE_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.IssuesCommentsDeleteResponse,
      path='projects/{projectId}/issues/{issueId}/comments/{commentId}',
      http_method='POST',
      name='issues.comments.undelete')
  def issues_comments_undelete(self, request):
    """Restore a deleted comment."""
    return self.aux_delete_comment(request, False)

  @monorail_api_method(
      api_pb2_v1.USERS_GET_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.UsersGetResponse,
      path='users/{userId}',
      http_method='GET',
      name='users.get')
  def users_get(self, request):
    """Get a user."""
    owner_project_only = request.ownerProjectsOnly
    mar = self.mar_factory(request)
    (visible_ownership, visible_deleted, visible_membership,
     visible_contrib) = sitewide_helpers.GetUserProjects(
        mar.cnxn, self._services, mar.auth.user_pb, mar.auth.effective_ids,
        mar.viewed_user_auth.effective_ids)

    project_list = []
    for proj in (visible_ownership + visible_deleted):
      config = self._services.config.GetProjectConfig(
          mar.cnxn, proj.project_id)
      proj_result = api_pb2_v1_helpers.convert_project(
          proj, config, api_pb2_v1.Role.owner)
      project_list.append(proj_result)
    if not owner_project_only:
      for proj in visible_membership:
        config = self._services.config.GetProjectConfig(
            mar.cnxn, proj.project_id)
        proj_result = api_pb2_v1_helpers.convert_project(
            proj, config, api_pb2_v1.Role.member)
        project_list.append(proj_result)
      for proj in visible_contrib:
        config = self._services.config.GetProjectConfig(
            mar.cnxn, proj.project_id)
        proj_result = api_pb2_v1_helpers.convert_project(
            proj, config, api_pb2_v1.Role.contributor)
        project_list.append(proj_result)

    return api_pb2_v1.UsersGetResponse(
        id=str(mar.viewed_user_auth.user_id),
        kind='monorail#user',
        projects=project_list,
    )

  @monorail_api_method(
      api_pb2_v1.ISSUES_GET_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.IssuesGetInsertResponse,
      path='projects/{projectId}/issues/{issueId}',
      http_method='GET',
      name='issues.get')
  def issues_get(self, request):
    """Get an issue."""
    mar = self.mar_factory(request)
    issue = self._services.issue.GetIssueByLocalID(
        mar.cnxn, mar.project_id, request.issueId)

    return api_pb2_v1_helpers.convert_issue(
        api_pb2_v1.IssuesGetInsertResponse, issue, mar, self._services)

  @monorail_api_method(
      api_pb2_v1.ISSUES_INSERT_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.IssuesGetInsertResponse,
      path='projects/{projectId}/issues',
      http_method='POST',
      name='issues.insert')
  def issues_insert(self, request):
    """Add a new issue."""
    mar = self.mar_factory(request)
    if not mar.perms.CanUsePerm(
        permissions.CREATE_ISSUE, mar.auth.effective_ids, mar.project, []):
      raise permissions.PermissionException(
          'The requester %s is not allowed to create issues for project %s.' %
          (mar.auth.email, mar.project_name))

    owner_id = None
    if request.owner:
      try:
        owner_id = self._services.user.LookupUserID(
            mar.cnxn, request.owner.name)
      except user_svc.NoSuchUserException:
        raise endpoints.BadRequestException(
            'The specified owner %s does not exist.' % request.owner.name)

    cc_ids = []
    if request.cc:
      cc_ids = self._services.user.LookupUserIDs(
          mar.cnxn, [ap.name for ap in request.cc],
          autocreate=True).values()
    comp_ids = api_pb2_v1_helpers.convert_component_ids(
        mar.config, request.components)
    fields_add, _, _, fields_labels, _ = (
        api_pb2_v1_helpers.convert_field_values(
            request.fieldValues, mar, self._services))
    field_helpers.ValidateCustomFields(
        mar, self._services, fields_add, mar.config, mar.errors)
    if mar.errors.AnyErrors():
      raise endpoints.BadRequestException(
          'Invalid field values: %s' % mar.errors.custom_fields)

    local_id = self._services.issue.CreateIssue(
        mar.cnxn, self._services, mar.project_id,
        request.summary, request.status, owner_id,
        cc_ids, request.labels + fields_labels, fields_add,
        comp_ids, mar.auth.user_id, request.description,
        blocked_on=api_pb2_v1_helpers.convert_issueref_pbs(
            request.blockedOn, mar, self._services),
        blocking=api_pb2_v1_helpers.convert_issueref_pbs(
            request.blocking, mar, self._services))
    new_issue = self._services.issue.GetIssueByLocalID(
        mar.cnxn, mar.project_id, local_id)

    self._services.issue_star.SetStar(
        mar.cnxn, self._services, mar.config, new_issue.issue_id,
        mar.auth.user_id, True)

    if request.sendEmail:
      notify.PrepareAndSendIssueChangeNotification(
          new_issue.issue_id, framework_helpers.GetHostPort(),
          new_issue.reporter_id, 0)

    return api_pb2_v1_helpers.convert_issue(
        api_pb2_v1.IssuesGetInsertResponse, new_issue, mar, self._services)

  @monorail_api_method(
      api_pb2_v1.ISSUES_LIST_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.IssuesListResponse,
      path='projects/{projectId}/issues',
      http_method='GET',
      name='issues.list')
  def issues_list(self, request):
    """List issues for projects."""
    mar = self.mar_factory(request)

    if request.additionalProject:
      for project_name in request.additionalProject:
        project = self._services.project.GetProjectByName(
            mar.cnxn, project_name)
        if project and not permissions.UserCanViewProject(
            mar.auth.user_pb, mar.auth.effective_ids, project):
          raise permissions.PermissionException(
              'The user %s has no permission for project %s' %
              (mar.auth.email, project_name))
    prof = profiler.Profiler()
    pipeline = frontendsearchpipeline.FrontendSearchPipeline(
        mar, self._services, prof, mar.num)
    if not mar.errors.AnyErrors():
      pipeline.SearchForIIDs()
      pipeline.MergeAndSortIssues()
      pipeline.Paginate()
    else:
      raise endpoints.BadRequestException(mar.errors.query)

    issue_list = [
        api_pb2_v1_helpers.convert_issue(
            api_pb2_v1.IssueWrapper, r, mar, self._services)
        for r in pipeline.visible_results]
    return api_pb2_v1.IssuesListResponse(
        kind='monorail#issueList',
        totalResults=pipeline.total_count,
        items=issue_list)

  @monorail_api_method(
      api_pb2_v1.GROUPS_SETTINGS_LIST_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.GroupsSettingsListResponse,
      path='groups/settings',
      http_method='GET',
      name='groups.settings.list')
  def groups_settings_list(self, request):
    """List all group settings."""
    mar = self.mar_factory(request)
    all_groups = self._services.usergroup.GetAllUserGroupsInfo(mar.cnxn)
    group_settings = []
    for g in all_groups:
      setting = g[2]
      wrapper = api_pb2_v1_helpers.convert_group_settings(g[0], setting)
      if not request.importedGroupsOnly or wrapper.ext_group_type:
        group_settings.append(wrapper)
    return api_pb2_v1.GroupsSettingsListResponse(
        groupSettings=group_settings)

  @monorail_api_method(
      api_pb2_v1.GROUPS_CREATE_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.GroupsCreateResponse,
      path='groups',
      http_method='POST',
      name='groups.create')
  def groups_create(self, request):
    """Create a new user group."""
    mar = self.mar_factory(request)
    if not permissions.CanCreateGroup(mar.perms):
      raise permissions.PermissionException(
          'The user is not allowed to create groups.')

    user_dict = self._services.user.LookupExistingUserIDs(
        mar.cnxn, [request.groupName])
    if request.groupName.lower() in user_dict:
      raise usergroup_svc.GroupExistsException(
          'group %s already exists' % request.groupName)

    if request.ext_group_type:
      ext_group_type = str(request.ext_group_type).lower()
    else:
      ext_group_type = None
    group_id = self._services.usergroup.CreateGroup(
        mar.cnxn, self._services, request.groupName,
        str(request.who_can_view_members).lower(),
        ext_group_type)

    return api_pb2_v1.GroupsCreateResponse(
        groupID=group_id)

  @monorail_api_method(
      api_pb2_v1.GROUPS_GET_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.GroupsGetResponse,
      path='groups/{groupName}',
      http_method='GET',
      name='groups.get')
  def groups_get(self, request):
    """Get a group's settings and users."""
    mar = self.mar_factory(request)
    if not mar.viewed_user_auth:
      raise user_svc.NoSuchUserException(request.groupName)
    group_id = mar.viewed_user_auth.user_id
    group_settings = self._services.usergroup.GetGroupSettings(
        mar.cnxn, group_id)
    member_ids, owner_ids = self._services.usergroup.LookupAllMembers(
          mar.cnxn, [group_id])
    (owned_project_ids, membered_project_ids,
     contrib_project_ids) = self._services.project.GetUserRolesInAllProjects(
         mar.cnxn, mar.auth.effective_ids)
    project_ids = owned_project_ids.union(
        membered_project_ids).union(contrib_project_ids)
    if not permissions.CanViewGroup(
        mar.perms, mar.auth.effective_ids, group_settings, member_ids[group_id],
        owner_ids[group_id], project_ids):
      raise permissions.PermissionException(
          'The user is not allowed to view this group.')

    member_ids, owner_ids = self._services.usergroup.LookupMembers(
        mar.cnxn, [group_id])

    member_emails = self._services.user.LookupUserEmails(
        mar.cnxn, member_ids[group_id]).values()
    owner_emails = self._services.user.LookupUserEmails(
        mar.cnxn, owner_ids[group_id]).values()

    return api_pb2_v1.GroupsGetResponse(
      groupID=group_id,
      groupSettings=api_pb2_v1_helpers.convert_group_settings(
          request.groupName, group_settings),
      groupOwners=owner_emails,
      groupMembers=member_emails)

  @monorail_api_method(
      api_pb2_v1.GROUPS_UPDATE_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.GroupsUpdateResponse,
      path='groups/{groupName}',
      http_method='POST',
      name='groups.update')
  def groups_update(self, request):
    """Update a group's settings and users."""
    mar = self.mar_factory(request)
    group_id = mar.viewed_user_auth.user_id
    member_ids_dict, owner_ids_dict = self._services.usergroup.LookupMembers(
        mar.cnxn, [group_id])
    owner_ids = owner_ids_dict.get(group_id, [])
    member_ids = member_ids_dict.get(group_id, [])
    if not permissions.CanEditGroup(
        mar.perms, mar.auth.effective_ids, owner_ids):
      raise permissions.PermissionException(
          'The user is not allowed to edit this group.')

    group_settings = self._services.usergroup.GetGroupSettings(
        mar.cnxn, group_id)
    if (request.who_can_view_members or request.ext_group_type
        or request.last_sync_time or request.friend_projects):
      group_settings.who_can_view_members = (
          request.who_can_view_members or group_settings.who_can_view_members)
      group_settings.ext_group_type = (
          request.ext_group_type or group_settings.ext_group_type)
      group_settings.last_sync_time = (
          request.last_sync_time or group_settings.last_sync_time)
      if framework_constants.NO_VALUES in request.friend_projects:
        group_settings.friend_projects = []
      else:
        id_dict = self._services.project.LookupProjectIDs(
            mar.cnxn, request.friend_projects)
        group_settings.friend_projects = (
            id_dict.values() or group_settings.friend_projects)
      self._services.usergroup.UpdateSettings(
          mar.cnxn, group_id, group_settings)

    if request.groupOwners or request.groupMembers:
      self._services.usergroup.RemoveMembers(
          mar.cnxn, group_id, owner_ids + member_ids)
      owners_dict = self._services.user.LookupUserIDs(
          mar.cnxn, request.groupOwners, autocreate=True)
      self._services.usergroup.UpdateMembers(
          mar.cnxn, group_id, owners_dict.values(), 'owner')
      members_dict = self._services.user.LookupUserIDs(
          mar.cnxn, request.groupMembers, autocreate=True)
      self._services.usergroup.UpdateMembers(
          mar.cnxn, group_id, members_dict.values(), 'member')

    return api_pb2_v1.GroupsUpdateResponse()

  @monorail_api_method(
      api_pb2_v1.COMPONENTS_LIST_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.ComponentsListResponse,
      path='projects/{projectId}/components',
      http_method='GET',
      name='components.list')
  def components_list(self, request):
    """List all components of a given project."""
    mar = self.mar_factory(request)
    config = self._services.config.GetProjectConfig(mar.cnxn, mar.project_id)
    components = [api_pb2_v1_helpers.convert_component_def(
        cd, mar, self._services) for cd in config.component_defs]
    return api_pb2_v1.ComponentsListResponse(
        components=components)

  @monorail_api_method(
      api_pb2_v1.COMPONENTS_CREATE_REQUEST_RESOURCE_CONTAINER,
      api_pb2_v1.Component,
      path='projects/{projectId}/components',
      http_method='POST',
      name='components.create')
  def components_create(self, request):
    """Create a component."""
    mar = self.mar_factory(request)
    if not mar.perms.CanUsePerm(
        permissions.EDIT_PROJECT, mar.auth.effective_ids, mar.project, []):
      raise permissions.PermissionException(
          'User is not allowed to create components for this project')

    config = self._services.config.GetProjectConfig(mar.cnxn, mar.project_id)
    leaf_name = request.componentName
    if not tracker_constants.COMPONENT_NAME_RE.match(leaf_name):
      raise config_svc.InvalidComponentNameException(
          'The component name %s is invalid.' % leaf_name)

    parent_path = request.parentPath
    if parent_path:
      parent_def = tracker_bizobj.FindComponentDef(parent_path, config)
      if not parent_def:
        raise config_svc.NoSuchComponentException(
            'Parent component %s does not exist.' % parent_path)
      if not permissions.CanEditComponentDef(
          mar.auth.effective_ids, mar.perms, mar.project, parent_def, config):
        raise permissions.PermissionException(
            'User is not allowed to add a subcomponent to component %s' %
            parent_path)

      path = '%s>%s' % (parent_path, leaf_name)
    else:
      path = leaf_name

    if tracker_bizobj.FindComponentDef(path, config):
      raise config_svc.InvalidComponentNameException(
          'The name %s is already in use.' % path)

    created = int(time.time())
    user_emails = set()
    user_emails.update([mar.auth.email] + request.admin + request.cc)
    user_ids_dict = self._services.user.LookupUserIDs(
        mar.cnxn, list(user_emails), autocreate=False)
    admin_ids = [user_ids_dict[uname] for uname in request.admin]
    cc_ids = [user_ids_dict[uname] for uname in request.cc]
    label_ids = []  # TODO(jrobbins): allow API clients to specify this too.

    component_id = self._services.config.CreateComponentDef(
        mar.cnxn, mar.project_id, path, request.description, request.deprecated,
        admin_ids, cc_ids, created, user_ids_dict[mar.auth.email], label_ids)

    return api_pb2_v1.Component(
        componentId=component_id,
        projectName=request.projectId,
        componentPath=path,
        description=request.description,
        admin=request.admin,
        cc=request.cc,
        deprecated=request.deprecated,
        created=datetime.datetime.fromtimestamp(created),
        creator=mar.auth.email)

  @monorail_api_method(
      api_pb2_v1.COMPONENTS_DELETE_REQUEST_RESOURCE_CONTAINER,
      message_types.VoidMessage,
      path='projects/{projectId}/components/{componentPath}',
      http_method='DELETE',
      name='components.delete')
  def components_delete(self, request):
    """Delete a component."""
    mar = self.mar_factory(request)
    config = self._services.config.GetProjectConfig(mar.cnxn, mar.project_id)
    component_path = request.componentPath
    component_def = tracker_bizobj.FindComponentDef(
        component_path, config)
    if not component_def:
      raise config_svc.NoSuchComponentException(
          'The component %s does not exist.' % component_path)
    if not permissions.CanViewComponentDef(
        mar.auth.effective_ids, mar.perms, mar.project, component_def):
      raise permissions.PermissionException(
          'User is not allowed to view this component %s' % component_path)
    if not permissions.CanEditComponentDef(
        mar.auth.effective_ids, mar.perms, mar.project, component_def, config):
      raise permissions.PermissionException(
          'User is not allowed to delete this component %s' % component_path)

    allow_delete = not tracker_bizobj.FindDescendantComponents(
        config, component_def)
    if not allow_delete:
      raise permissions.PermissionException(
          'User tried to delete component that had subcomponents')

    self._services.issue.DeleteComponentReferences(
        mar.cnxn, component_def.component_id)
    self._services.config.DeleteComponentDef(
        mar.cnxn, mar.project_id, component_def.component_id)
    return message_types.VoidMessage()

  @monorail_api_method(
      api_pb2_v1.COMPONENTS_UPDATE_REQUEST_RESOURCE_CONTAINER,
      message_types.VoidMessage,
      path='projects/{projectId}/components/{componentPath}',
      http_method='POST',
      name='components.update')
  def components_update(self, request):
    """Update a component."""
    mar = self.mar_factory(request)
    config = self._services.config.GetProjectConfig(mar.cnxn, mar.project_id)
    component_path = request.componentPath
    component_def = tracker_bizobj.FindComponentDef(
        component_path, config)
    if not component_def:
      raise config_svc.NoSuchComponentException(
          'The component %s does not exist.' % component_path)
    if not permissions.CanViewComponentDef(
        mar.auth.effective_ids, mar.perms, mar.project, component_def):
      raise permissions.PermissionException(
          'User is not allowed to view this component %s' % component_path)
    if not permissions.CanEditComponentDef(
        mar.auth.effective_ids, mar.perms, mar.project, component_def, config):
      raise permissions.PermissionException(
          'User is not allowed to edit this component %s' % component_path)

    original_path = component_def.path
    new_path = component_def.path
    new_docstring = component_def.docstring
    new_deprecated = component_def.deprecated
    new_admin_ids = component_def.admin_ids
    new_cc_ids = component_def.cc_ids
    update_filterrule = False
    for update in request.updates:
      if update.field == api_pb2_v1.ComponentUpdateFieldID.LEAF_NAME:
        leaf_name = update.leafName
        if not tracker_constants.COMPONENT_NAME_RE.match(leaf_name):
          raise config_svc.InvalidComponentNameException(
              'The component name %s is invalid.' % leaf_name)

        if '>' in original_path:
          parent_path = original_path[:original_path.rindex('>')]
          new_path = '%s>%s' % (parent_path, leaf_name)
        else:
          new_path = leaf_name

        conflict = tracker_bizobj.FindComponentDef(new_path, config)
        if conflict and conflict.component_id != component_def.component_id:
          raise config_svc.InvalidComponentNameException(
              'The name %s is already in use.' % new_path)
        update_filterrule = True
      elif update.field == api_pb2_v1.ComponentUpdateFieldID.DESCRIPTION:
        new_docstring = update.description
      elif update.field == api_pb2_v1.ComponentUpdateFieldID.ADMIN:
        user_ids_dict = self._services.user.LookupUserIDs(
            mar.cnxn, list(update.admin), autocreate=True)
        new_admin_ids = [user_ids_dict[email] for email in update.admin]
      elif update.field == api_pb2_v1.ComponentUpdateFieldID.CC:
        user_ids_dict = self._services.user.LookupUserIDs(
            mar.cnxn, list(update.cc), autocreate=True)
        new_cc_ids = [user_ids_dict[email] for email in update.cc]
        update_filterrule = True
      elif update.field == api_pb2_v1.ComponentUpdateFieldID.DEPRECATED:
        new_deprecated = update.deprecated
      else:
        logging.error('Unknown component field %r', update.field)

    new_modified = int(time.time())
    new_modifier_id = self._services.user.LookupUserID(
        mar.cnxn, mar.auth.email, autocreate=False)
    logging.info(
        'Updating component id %d: path-%s, docstring-%s, deprecated-%s,'
        ' admin_ids-%s, cc_ids-%s modified by %s', component_def.component_id,
        new_path, new_docstring, new_deprecated, new_admin_ids, new_cc_ids,
        new_modifier_id)
    self._services.config.UpdateComponentDef(
        mar.cnxn, mar.project_id, component_def.component_id,
        path=new_path, docstring=new_docstring, deprecated=new_deprecated,
        admin_ids=new_admin_ids, cc_ids=new_cc_ids, modified=new_modified,
        modifier_id=new_modifier_id)

    # TODO(sheyang): reuse the code in componentdetails
    if original_path != new_path:
      # If the name changed then update all of its subcomponents as well.
      subcomponent_ids = tracker_bizobj.FindMatchingComponentIDs(
          original_path, config, exact=False)
      for subcomponent_id in subcomponent_ids:
        if subcomponent_id == component_def.component_id:
          continue
        subcomponent_def = tracker_bizobj.FindComponentDefByID(
            subcomponent_id, config)
        subcomponent_new_path = subcomponent_def.path.replace(
            original_path, new_path, 1)
        self._services.config.UpdateComponentDef(
            mar.cnxn, mar.project_id, subcomponent_def.component_id,
            path=subcomponent_new_path)

    if update_filterrule:
      filterrules_helpers.RecomputeAllDerivedFields(
          mar.cnxn, self._services, mar.project, config)

    return message_types.VoidMessage()
Exemplo n.º 19
0
def CounterMetric(name, reset_after=False, description=None,
                  field_spec=_MISSING, start_time=None):
  """Returns a metric handle for a counter named |name|."""
  return ts_mon.CounterMetric(name,
                              description=description, field_spec=field_spec,
                              start_time=start_time)
Exemplo n.º 20
0
class LoadApiClientConfigs(webapp2.RequestHandler):

    config_loads = ts_mon.CounterMetric(
        'monorail/client_config_svc/loads',
        'Results of fetches from luci-config.',
        [ts_mon.BooleanField('success'),
         ts_mon.StringField('type')])

    def get(self):
        authorization_token, _ = app_identity.get_access_token(
            framework_constants.OAUTH_SCOPE)
        response = urlfetch.fetch(LUCI_CONFIG_URL,
                                  method=urlfetch.GET,
                                  follow_redirects=False,
                                  headers={
                                      'Content-Type':
                                      'application/json; charset=UTF-8',
                                      'Authorization':
                                      'Bearer ' + authorization_token
                                  })

        if response.status_code != 200:
            logging.error('Invalid response from luci-config: %r', response)
            self.config_loads.increment({
                'success': False,
                'type': 'luci-cfg-error'
            })
            self.abort(500, 'Invalid response from luci-config')

        try:
            content_text = self._process_response(response)
        except Exception as e:
            self.abort(500, str(e))

        logging.info('luci-config content decoded: %r.', content_text)
        configs = ClientConfig(configs=content_text,
                               key_name='api_client_configs')
        configs.put()
        self.config_loads.increment({'success': True, 'type': 'success'})

    def _process_response(self, response):
        try:
            content = json.loads(response.content)
        except ValueError:
            logging.error('Response was not JSON: %r', response.content)
            self.config_loads.increment({
                'success': False,
                'type': 'json-load-error'
            })
            raise

        try:
            config_content = content['content']
        except KeyError:
            logging.error('JSON contained no content: %r', content)
            self.config_loads.increment({
                'success': False,
                'type': 'json-key-error'
            })
            raise

        try:
            content_text = base64.b64decode(config_content)
        except TypeError:
            logging.error('Content was not b64: %r', config_content)
            self.config_loads.increment({
                'success': False,
                'type': 'b64-decode-error'
            })
            raise

        try:
            cfg = api_clients_config_pb2.ClientCfg()
            protobuf.text_format.Merge(content_text, cfg)
        except:
            logging.error('Content was not a valid ClientCfg proto: %r',
                          content_text)
            self.config_loads.increment({
                'success': False,
                'type': 'proto-load-error'
            })
            raise

        return content_text
Exemplo n.º 21
0
class BugdroidGitPollerHandler(poller_handlers.BasePollerHandler):
    """Handler for updating bugs with information from commits."""

    bug_comments_metric = ts_mon.CounterMetric(
        'bugdroid/bug_comments', 'Counter of comments added to bugs',
        [ts_mon.StringField('project'),
         ts_mon.StringField('status')])

    def __init__(self,
                 monorail,
                 logger,
                 default_project,
                 no_merge=None,
                 public_bugs=True,
                 test_mode=False,
                 issues_labels=None,
                 *args,
                 **kwargs):
        self.monorail_client = monorail
        self.logger = logger
        self.default_project = default_project
        self.no_merge = no_merge or []
        self.public_bugs = public_bugs
        self.test_mode = test_mode
        if issues_labels:
            self.issues_labels = dict((p.key, p.value) for p in issues_labels)
        else:
            self.issues_labels = {}
        super(BugdroidGitPollerHandler, self).__init__(*args, **kwargs)

    def _ApplyMergeMergedLabel(self, issue, branch):
        if not branch or not issue:
            return

        label = '%s-%s' % (self.issues_labels.get('merge',
                                                  'merge-merged'), branch)
        issue.add_label(label)
        self.logger.debug('Adding %s', label)

        label = self.issues_labels.get('approved', 'merge-approved')
        if issue.has_label(label):
            issue.remove_label(label)
            self.logger.debug('Removing %s', label)

        mstone = branch_utils.get_mstone(branch, False)
        if mstone:
            label = 'merge-approved-%s' % mstone
            if issue.has_label(label):
                issue.remove_label(label)
                self.logger.debug('Removing %s', label)

    def ProcessLogEntry(self, log_entry):
        project_bugs = log_parser.get_issues(
            log_entry, default_project=self.default_project)
        self.logger.info('Processing commit %s : bugs %s', log_entry.revision,
                         str(project_bugs))
        if project_bugs:
            comment = self._CreateMessage(log_entry)
            self.logger.debug(comment)

            for project, bugs in project_bugs.iteritems():
                for bug in bugs:
                    try:
                        issue = self.monorail_client.get_issue(project, bug)
                        issue.set_comment(comment[:24 * 1024])
                        branch = scm_helper.GetBranch(log_entry)
                        # Apply merge labels if this commit landed on a branch.
                        if branch and not (log_entry.scm in ['git', 'gerrit']
                                           and scm_helper.GetBranch(log_entry,
                                                                    full=True)
                                           in self.no_merge):
                            self._ApplyMergeMergedLabel(issue, branch)
                        self.logger.debug('Attempting to save issue: %d',
                                          issue.id)
                        if not self.test_mode:
                            self.monorail_client.update_issue(
                                project, issue,
                                log_parser.should_send_email(log_entry.msg))
                        else:
                            self.logger.debug('Test mode, skipping')
                    except Exception:
                        self.bug_comments_metric.increment({
                            'project': project,
                            'status': 'failure'
                        })
                        raise
                    else:
                        self.bug_comments_metric.increment({
                            'project': project,
                            'status': 'success'
                        })

    def _CreateMessage(self, log_entry):
        msg = ''
        msg += 'The following revision refers to this bug:\n'
        msg += '  %s\n\n' % log_entry.GetCommitUrl()
        msg += self._BuildLogSpecial(log_entry)
        return msg

    def _BuildLogSpecial(self, log_entry):
        """Generate git-log style message, with links to files in the Web UI."""
        rtn = 'commit %s\n' % log_entry.commit
        rtn += 'Author: %s <%s>\n' % (log_entry.author_name,
                                      log_entry.author_email)
        rtn += 'Date: %s\n' % log_entry.committer_date
        if self.public_bugs:
            rtn += '\n%s\n' % log_entry.msg
            for path in log_entry.paths:
                if path.action == 'delete':
                    # Use parent and copy_from_path for deletions, otherwise we get links
                    # to https://.../<commit>//dev/null
                    rtn += '[%s] %s\n' % (
                        path.action,
                        log_entry.GetPathUrl(
                            path.copy_from_path, parent=True, universal=True))
                else:
                    rtn += '[%s] %s\n' % (path.action,
                                          log_entry.GetPathUrl(path.filename,
                                                               universal=True))
        return rtn
Exemplo n.º 22
0
    return cnxn


# One connection pool per database instance (master, replicas are each an
# instance). We'll have four connections per instance because we fetch
# issue comments, stars, spam verdicts and spam verdict history in parallel
# with promises.
cnxn_pool = ConnectionPool(settings.db_cnxn_pool_size)

# MonorailConnection maintains a dictionary of connections to SQL databases.
# Each is identified by an int shard ID.
# And there is one connection to the master DB identified by key MASTER_CNXN.
MASTER_CNXN = 'master_cnxn'

CONNECTION_COUNT = ts_mon.CounterMetric(
    'monorail/sql/connection_count',
    'Count of connections made to the SQL database.',
    [ts_mon.BooleanField('success')])

DB_CNXN_LATENCY = ts_mon.CumulativeDistributionMetric(
    'monorail/sql/db_cnxn_latency',
    'Time needed to establish a DB connection.', None)

DB_QUERY_LATENCY = ts_mon.CumulativeDistributionMetric(
    'monorail/sql/db_query_latency', 'Time needed to make a DB query.',
    [ts_mon.StringField('type')])

DB_COMMIT_LATENCY = ts_mon.CumulativeDistributionMetric(
    'monorail/sql/db_commit_latency', 'Time needed to make a DB commit.', None)

DB_ROLLBACK_LATENCY = ts_mon.CumulativeDistributionMetric(
    'monorail/sql/db_rollback_latency', 'Time needed to make a DB rollback.',
Exemplo n.º 23
0
from twisted.internet import error, reactor, task
from twisted.application import service, internet
from twisted.cred import credentials

import buildslave
from buildslave.pbutil import ReconnectingPBClientFactory
from buildslave.commands import registry, base
from buildslave import monkeypatches

from infra_libs import ts_mon

connected_metric = ts_mon.BooleanMetric(
    'buildbot/slave/connected',
    'Whether the slave is currently connected to its master.', None)
connection_failures_metric = ts_mon.CounterMetric(
    'buildbot/slave/connection_failures',
    'Count of failures connecting to the buildbot master.',
    [ts_mon.StringField('reason')])
running_metric = ts_mon.BooleanMetric(
    'buildbot/slave/is_building',
    'Whether a build step is currently in progress.',
    [ts_mon.StringField('builder')])
steps_metric = ts_mon.CounterMetric(
    'buildbot/slave/steps',
    'Count of build steps run by each builder on this slave.',
    [ts_mon.StringField('builder'),
     ts_mon.BooleanField('success')])


class UnknownCommand(pb.Error):
    pass
Exemplo n.º 24
0
class ServiceThread(threading.Thread):
    """Thread that controls a single Service object.

  The methods on this object (start_service(), stop_service(), etc.) can be
  called from any thread and are asynchronous - they just instruct the thread to
  perform the given action on the Service.

  This thread also polls the service occasionally and restarts it if it crashed.
  """

    failures = ts_mon.CounterMetric('service_manager/failures')
    reconfigs = ts_mon.CounterMetric('service_manager/reconfigs')
    upgrades = ts_mon.CounterMetric('service_manager/upgrades')

    def __init__(self,
                 poll_interval,
                 state_directory,
                 service_config,
                 wait_condition=None):
        """
    Args:
      poll_interval: How often (in seconds) to restart failed services.
      state_directory: A file will be created in this directory (with the same
          name as the service) when it is running containing its PID and
          starttime.
      service_config: A dictionary containing the service's config.  See README
          for a description of the fields.
    """

        super(ServiceThread, self).__init__()

        if wait_condition is None:  # pragma: no cover
            wait_condition = threading.Condition()

        self._poll_interval = poll_interval
        self._state_directory = state_directory
        self._service = service.Service(state_directory, service_config)

        self._condition = wait_condition  # Protects _state.
        self._state = _State()  # _condition must be held.
        self._state_changed = False

        self._started = False  # Whether we started the service already.

    def _wait(self):
        with self._condition:
            if not self._state_changed:  # pragma: no cover
                self._condition.wait(self._poll_interval)

            # Clone the state object so we can release the lock.
            ret = self._state.clone()
            self._state.new_config = None
            self._state_changed = False
            return ret

    @contextlib.contextmanager
    def _change_state(self):
        with self._condition:
            yield
            self._state_changed = True
            self._condition.notify()

    def run(self):
        while True:
            try:
                state = self._wait()

                if state.exit:
                    return
                elif state.new_config is not None:
                    # Stop the service if it's currently running.
                    self._service.stop()

                    # Recreate it with the new config and start it.
                    self.reconfigs.increment(
                        fields={'service': self._service.name})
                    self._service = service.Service(self._state_directory,
                                                    state.new_config)
                    self._service.start()
                    self._started = True
                elif state.should_run == False:
                    # Ensure the service is stopped.
                    self._service.stop()
                    self._started = False
                elif state.should_run == True:
                    try:
                        state = self._service.get_running_process_state()
                    except service.UnexpectedProcessStateError:
                        self.failures.increment(
                            fields={'service': self._service.name})
                        logging.exception(
                            'Unexpected error getting state for service %s',
                            self._service.name)
                    except service.ProcessNotRunning as ex:
                        if self._started:
                            # We started it last time but it's not running any more.
                            self.failures.increment(
                                fields={'service': self._service.name})
                            LOGGER.warning(
                                'Service %s failed (%r), restarting',
                                self._service.name, ex)
                        else:
                            # We're about to start it for the first time.
                            LOGGER.info(
                                'Starting service %s for the first time (%r)',
                                self._service.name, ex)
                    else:
                        if self._service.has_version_changed(state):
                            self.upgrades.increment(
                                fields={'service': self._service.name})
                            LOGGER.info(
                                'Service %s has a new package version, restarting',
                                self._service.name)
                            self._service.stop()
                        elif self._service.has_args_changed(state):
                            self.reconfigs.increment(
                                fields={'service': self._service.name})
                            LOGGER.info(
                                'Service %s has new args: was %s, restarting with %s',
                                self._service.name, state.args,
                                self._service.args)
                            self._service.stop()

                    # Ensure the service is running.
                    self._service.start()
                    self._started = True

            except Exception:
                LOGGER.exception('Service thread failed for service %s',
                                 self._service.name)

    def start_service(self):
        with self._change_state():
            self._state.should_run = True

    def stop_service(self):
        with self._change_state():
            self._state.should_run = False

    def stop(self, join=True):
        with self._change_state():
            self._state.exit = True

        if join:  # pragma: no cover
            self.join()

    def restart_with_new_config(self, new_config):
        with self._change_state():
            self._state.new_config = new_config
            self._state.should_run = True
Exemplo n.º 25
0
class SpamService(object):
    """The persistence layer for spam reports."""
    issue_actions = ts_mon.CounterMetric(
        'monorail/spam_svc/issue', 'Count of things that happen to issues.',
        [ts_mon.StringField('type')])
    comment_actions = ts_mon.CounterMetric(
        'monorail/spam_svc/comment',
        'Count of things that happen to comments.',
        [ts_mon.StringField('type')])
    prediction_api_failures = ts_mon.CounterMetric(
        'mononrail/spam_svc/prediction_api_failure',
        'Failures calling the prediction API', None)

    def __init__(self):
        self.report_tbl = sql.SQLTableManager(SPAMREPORT_TABLE_NAME)
        self.verdict_tbl = sql.SQLTableManager(SPAMVERDICT_TABLE_NAME)
        self.issue_tbl = sql.SQLTableManager(ISSUE_TABLE)

        self.prediction_service = None
        try:
            credentials = GoogleCredentials.get_application_default()
            self.prediction_service = build('prediction',
                                            'v1.6',
                                            http=httplib2.Http(),
                                            credentials=credentials)
        except (Oauth2ClientError, ApiClientError):
            logging.error("Error getting GoogleCredentials: %s" %
                          sys.exc_info()[0])

    def LookupIssueFlaggers(self, cnxn, issue_id):
        """Returns users who've reported the issue or its comments as spam.

    Returns a tuple. First element is a list of users who flagged the issue;
    second element is a dictionary of comment id to a list of users who flagged
    that comment.
    """
        rows = self.report_tbl.Select(cnxn,
                                      cols=['user_id', 'comment_id'],
                                      issue_id=issue_id)

        issue_reporters = []
        comment_reporters = collections.defaultdict(list)
        for row in rows:
            if row[1]:
                comment_reporters[row[1]].append(row[0])
            else:
                issue_reporters.append(row[0])

        return issue_reporters, comment_reporters

    def LookupIssueFlagCounts(self, cnxn, issue_ids):
        """Returns a map of issue_id to flag counts"""
        rows = self.report_tbl.Select(cnxn,
                                      cols=['issue_id', 'COUNT(*)'],
                                      issue_id=issue_ids,
                                      group_by=['issue_id'])
        counts = {}
        for row in rows:
            counts[long(row[0])] = row[1]
        return counts

    def LookupIssueVerdicts(self, cnxn, issue_ids):
        """Returns a map of issue_id to most recent spam verdicts"""
        rows = self.verdict_tbl.Select(
            cnxn,
            cols=['issue_id', 'reason', 'MAX(created)'],
            issue_id=issue_ids,
            group_by=['issue_id'])
        counts = {}
        for row in rows:
            counts[long(row[0])] = row[1]
        return counts

    def LookupIssueVerdictHistory(self, cnxn, issue_ids):
        """Returns a map of issue_id to most recent spam verdicts"""
        rows = self.verdict_tbl.Select(cnxn,
                                       cols=[
                                           'issue_id', 'reason', 'created',
                                           'is_spam', 'classifier_confidence',
                                           'user_id', 'overruled'
                                       ],
                                       issue_id=issue_ids,
                                       order_by=[('issue_id', []),
                                                 ('created', [])])

        # TODO: group by issue_id, make class instead of dict for verdict.
        verdicts = []
        for row in rows:
            verdicts.append({
                'issue_id': row[0],
                'reason': row[1],
                'created': row[2],
                'is_spam': row[3],
                'classifier_confidence': row[4],
                'user_id': row[5],
                'overruled': row[6],
            })

        return verdicts

    def LookupCommentVerdictHistory(self, cnxn, comment_ids):
        """Returns a map of issue_id to most recent spam verdicts"""
        rows = self.verdict_tbl.Select(cnxn,
                                       cols=[
                                           'comment_id', 'reason', 'created',
                                           'is_spam', 'classifier_confidence',
                                           'user_id', 'overruled'
                                       ],
                                       comment_id=comment_ids,
                                       order_by=[('comment_id', []),
                                                 ('created', [])])

        # TODO: group by comment_id, make class instead of dict for verdict.
        verdicts = []
        for row in rows:
            verdicts.append({
                'comment_id': row[0],
                'reason': row[1],
                'created': row[2],
                'is_spam': row[3],
                'classifier_confidence': row[4],
                'user_id': row[5],
                'overruled': row[6],
            })

        return verdicts

    def FlagIssues(self, cnxn, issue_service, issues, reporting_user_id,
                   flagged_spam):
        """Creates or deletes a spam report on an issue."""
        verdict_updates = []
        if flagged_spam:
            rows = [(issue.issue_id, issue.reporter_id, reporting_user_id)
                    for issue in issues]
            self.report_tbl.InsertRows(cnxn,
                                       SPAMREPORT_ISSUE_COLS,
                                       rows,
                                       ignore=True)
        else:
            issue_ids = [issue.issue_id for issue in issues]
            self.report_tbl.Delete(cnxn,
                                   issue_id=issue_ids,
                                   user_id=reporting_user_id,
                                   comment_id=None)

        project_id = issues[0].project_id

        # Now record new verdicts and update issue.is_spam, if they've changed.
        ids = [issue.issue_id for issue in issues]
        counts = self.LookupIssueFlagCounts(cnxn, ids)
        previous_verdicts = self.LookupIssueVerdicts(cnxn, ids)

        for issue_id in counts:
            # If the flag counts changed enough to toggle the is_spam bit, need to
            # record a new verdict and update the Issue.
            if ((flagged_spam and counts[issue_id] >= settings.spam_flag_thresh
                 or not flagged_spam
                 and counts[issue_id] < settings.spam_flag_thresh)
                    and (previous_verdicts[issue_id] != REASON_MANUAL
                         if issue_id in previous_verdicts else True)):
                verdict_updates.append(issue_id)

        if len(verdict_updates) == 0:
            return

        # Some of the issues may have exceed the flag threshold, so issue verdicts
        # and mark as spam in those cases.
        rows = [(issue_id, flagged_spam, REASON_THRESHOLD, project_id)
                for issue_id in verdict_updates]
        self.verdict_tbl.InsertRows(cnxn,
                                    THRESHVERDICT_ISSUE_COLS,
                                    rows,
                                    ignore=True)
        update_issues = []
        for issue in issues:
            if issue.issue_id in verdict_updates:
                issue.is_spam = flagged_spam
                update_issues.append(issue)

        if flagged_spam:
            self.issue_actions.increment_by(len(update_issues),
                                            {'type': 'flag'})

        issue_service.UpdateIssues(cnxn,
                                   update_issues,
                                   update_cols=['is_spam'])

    def FlagComment(self, cnxn, issue_id, comment_id, reported_user_id,
                    reporting_user_id, flagged_spam):
        """Creates or deletes a spam report on a comment."""
        # TODO(seanmccullough): Bulk comment flagging? There's no UI for that.
        if flagged_spam:
            self.report_tbl.InsertRow(cnxn,
                                      ignore=True,
                                      issue_id=issue_id,
                                      comment_id=comment_id,
                                      reported_user_id=reported_user_id,
                                      user_id=reporting_user_id)
            self.comment_actions.increment({'type': 'flag'})
        else:
            self.report_tbl.Delete(cnxn,
                                   issue_id=issue_id,
                                   comment_id=comment_id,
                                   user_id=reporting_user_id)

    def RecordClassifierIssueVerdict(self, cnxn, issue, is_spam, confidence,
                                     fail_open):
        reason = REASON_FAIL_OPEN if fail_open else REASON_CLASSIFIER
        self.verdict_tbl.InsertRow(cnxn,
                                   issue_id=issue.issue_id,
                                   is_spam=is_spam,
                                   reason=reason,
                                   classifier_confidence=confidence,
                                   project_id=issue.project_id)
        if is_spam:
            self.issue_actions.increment({'type': 'classifier'})
        # This is called at issue creation time, so there's nothing else to do here.

    def RecordManualIssueVerdicts(self, cnxn, issue_service, issues, user_id,
                                  is_spam):
        rows = [(user_id, issue.issue_id, is_spam, REASON_MANUAL,
                 issue.project_id) for issue in issues]
        issue_ids = [issue.issue_id for issue in issues]

        # Overrule all previous verdicts.
        self.verdict_tbl.Update(
            cnxn, {'overruled': True},
            [('issue_id IN (%s)' % sql.PlaceHolders(issue_ids), issue_ids)],
            commit=False)

        self.verdict_tbl.InsertRows(cnxn,
                                    MANUALVERDICT_ISSUE_COLS,
                                    rows,
                                    ignore=True)

        for issue in issues:
            issue.is_spam = is_spam

        if is_spam:
            self.issue_actions.increment_by(len(issues), {'type': 'manual'})
        else:
            issue_service.AllocateNewLocalIDs(cnxn, issues)

        # This will commit the transaction.
        issue_service.UpdateIssues(cnxn, issues, update_cols=['is_spam'])

    def RecordManualCommentVerdict(self, cnxn, issue_service, user_service,
                                   comment_id, sequence_num, user_id, is_spam):
        # TODO(seanmccullough): Bulk comment verdicts? There's no UI for that.
        self.verdict_tbl.InsertRow(cnxn,
                                   ignore=True,
                                   user_id=user_id,
                                   comment_id=comment_id,
                                   is_spam=is_spam,
                                   reason=REASON_MANUAL)
        comment = issue_service.GetComment(cnxn, comment_id)
        comment.is_spam = is_spam
        issue = issue_service.GetIssue(cnxn, comment.issue_id)
        issue_service.SoftDeleteComment(cnxn, comment.project_id,
                                        issue.local_id, sequence_num, user_id,
                                        user_service, is_spam, True, is_spam)
        if is_spam:
            self.comment_actions.increment({'type': 'manual'})

    def RecordClassifierCommentVerdict(self, cnxn, comment, is_spam,
                                       confidence, fail_open):
        reason = REASON_FAIL_OPEN if fail_open else REASON_CLASSIFIER
        self.verdict_tbl.InsertRow(cnxn,
                                   comment_id=comment.id,
                                   is_spam=is_spam,
                                   reason=reason,
                                   classifier_confidence=confidence,
                                   project_id=comment.project_id)
        if is_spam:
            self.comment_actions.increment({'type': 'classifier'})

    def _predict(self, body):
        return self.prediction_service.trainedmodels().predict(
            project=settings.classifier_project_id,
            id=settings.classifier_model_id,
            body=body).execute()

    def _IsExempt(self, author, is_project_member):
        """Return True if the user is exempt from spam checking."""
        if author.email is not None and author.email.endswith(
                settings.spam_whitelisted_suffixes):
            logging.info('%s whitelisted from spam filtering', author.email)
            return True

        if author.ignore_action_limits:
            logging.info('%s trusted not to spam', author.email)
            return True

        if is_project_member:
            logging.info('%s is a project member, assuming ham', author.email)
            return True

        return False

    def ClassifyIssue(self, issue, firstComment, reporter, is_project_member):
        """Classify an issue as either spam or ham.

    Args:
      issue: the Issue.
      firstComment: the first Comment on issue.
      reporter: User PB for the Issue reporter.
      is_project_member: True if reporter is a member of issue's project.

    Returns a JSON dict of classifier prediction results from
    the Cloud Prediction API.
    """
        # Fail-safe: not spam.
        result = {
            'outputLabel': 'ham',
            'outputMulti': [{
                'label': 'ham',
                'score': '1.0'
            }],
            'failed_open': False
        }

        if self._IsExempt(reporter, is_project_member):
            return result

        if not self.prediction_service:
            logging.error("prediction_service not initialized.")
            return result

        features = spam_helpers.GenerateFeatures(issue.summary,
                                                 firstComment.content,
                                                 settings.spam_feature_hashes)

        remaining_retries = 3
        while remaining_retries > 0:
            try:
                result = self._predict({'input': {
                    'csvInstance': features,
                }})
                result['failed_open'] = False
                return result
            except Exception as ex:
                remaining_retries = remaining_retries - 1
                self.prediction_api_failures.increment()
                logging.error('Error calling prediction API: %s' % ex)

            result['failed_open'] = True
        return result

    def ClassifyComment(self,
                        comment_content,
                        commenter,
                        is_project_member=True):
        """Classify a comment as either spam or ham.

    Args:
      comment: the comment text.
      commenter: User PB for the user who authored the comment.

    Returns a JSON dict of classifier prediction results from
    the Cloud Prediction API.
    """
        # Fail-safe: not spam.
        result = {
            'outputLabel': 'ham',
            'outputMulti': [{
                'label': 'ham',
                'score': '1.0'
            }],
            'failed_open': False
        }

        if self._IsExempt(commenter, is_project_member):
            return result

        if not self.prediction_service:
            logging.error("prediction_service not initialized.")
            self.prediction_api_failures.increment()
            result['failed_open'] = True
            return result

        features = spam_helpers.GenerateFeatures('', comment_content,
                                                 settings.spam_feature_hashes)

        remaining_retries = 3
        while remaining_retries > 0:
            try:
                result = self._predict({'input': {
                    'csvInstance': features,
                }})
                result['failed_open'] = False
                return result
            except Exception as ex:
                remaining_retries = remaining_retries - 1
                self.prediction_api_failures.increment()
                logging.error('Error calling prediction API: %s' % ex)

            result['failed_open'] = True
        return result

    def GetIssueClassifierQueue(self,
                                cnxn,
                                _issue_service,
                                project_id,
                                offset=0,
                                limit=10):
        """Returns list of recent issues with spam verdicts,
     ranked in ascending order of confidence (so uncertain items are first).
     """
        # TODO(seanmccullough): Optimize pagination. This query probably gets
        # slower as the number of SpamVerdicts grows, regardless of offset
        # and limit values used here.  Using offset,limit in general may not
        # be the best way to do this.
        issue_results = self.verdict_tbl.Select(
            cnxn,
            cols=[
                'issue_id', 'is_spam', 'reason', 'classifier_confidence',
                'created'
            ],
            where=[
                ('project_id = %s', [project_id]),
                ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
                ('overruled = %s', [False]),
                ('issue_id IS NOT NULL', []),
            ],
            order_by=[
                ('classifier_confidence ASC', []),
                ('created ASC', []),
            ],
            group_by=['issue_id'],
            offset=offset,
            limit=limit,
        )

        ret = []
        for row in issue_results:
            ret.append(
                ModerationItem(
                    issue_id=long(row[0]),
                    is_spam=row[1] == 1,
                    reason=row[2],
                    classifier_confidence=row[3],
                    verdict_time='%s' % row[4],
                ))

        count = self.verdict_tbl.SelectValue(
            cnxn,
            col='COUNT(*)',
            where=[
                ('project_id = %s', [project_id]),
                ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
                ('overruled = %s', [False]),
                ('issue_id IS NOT NULL', []),
            ])

        return ret, count

    def GetIssueFlagQueue(self,
                          cnxn,
                          _issue_service,
                          project_id,
                          offset=0,
                          limit=10):
        """Returns list of recent issues that have been flagged by users"""
        issue_flags = self.report_tbl.Select(
            cnxn,
            cols=[
                "Issue.project_id", "Report.issue_id", "count(*) as count",
                "max(Report.created) as latest",
                "count(distinct Report.user_id) as users"
            ],
            left_joins=["Issue ON Issue.id = Report.issue_id"],
            where=[('Report.issue_id IS NOT NULL', []),
                   ("Issue.project_id == %v", [project_id])],
            order_by=[('count DESC', [])],
            group_by=['Report.issue_id'],
            offset=offset,
            limit=limit)
        ret = []
        for row in issue_flags:
            ret.append(
                ModerationItem(
                    project_id=row[0],
                    issue_id=row[1],
                    count=row[2],
                    latest_report=row[3],
                    num_users=row[4],
                ))

        count = self.verdict_tbl.SelectValue(
            cnxn,
            col='COUNT(DISTINCT Report.issue_id)',
            where=[('Issue.project_id = %s', [project_id])],
            left_joins=["Issue ON Issue.id = SpamReport.issue_id"])
        return ret, count

    def GetCommentClassifierQueue(self,
                                  cnxn,
                                  _issue_service,
                                  project_id,
                                  offset=0,
                                  limit=10):
        """Returns list of recent comments with spam verdicts,
     ranked in ascending order of confidence (so uncertain items are first).
     """
        # TODO(seanmccullough): Optimize pagination. This query probably gets
        # slower as the number of SpamVerdicts grows, regardless of offset
        # and limit values used here.  Using offset,limit in general may not
        # be the best way to do this.
        comment_results = self.verdict_tbl.Select(
            cnxn,
            cols=[
                'issue_id', 'is_spam', 'reason', 'classifier_confidence',
                'created'
            ],
            where=[
                ('project_id = %s', [project_id]),
                ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
                ('overruled = %s', [False]),
                ('comment_id IS NOT NULL', []),
            ],
            order_by=[
                ('classifier_confidence ASC', []),
                ('created ASC', []),
            ],
            group_by=['comment_id'],
            offset=offset,
            limit=limit,
        )

        ret = []
        for row in comment_results:
            ret.append(
                ModerationItem(
                    comment_id=long(row[0]),
                    is_spam=row[1] == 1,
                    reason=row[2],
                    classifier_confidence=row[3],
                    verdict_time='%s' % row[4],
                ))

        count = self.verdict_tbl.SelectValue(
            cnxn,
            col='COUNT(*)',
            where=[
                ('project_id = %s', [project_id]),
                ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
                ('overruled = %s', [False]),
                ('comment_id IS NOT NULL', []),
            ])

        return ret, count

    def GetTrainingIssues(self,
                          cnxn,
                          issue_service,
                          since,
                          offset=0,
                          limit=100):
        """Returns list of recent issues with human-labeled spam/ham verdicts.
    """

        # get all of the manual verdicts in the past day.
        results = self.verdict_tbl.Select(
            cnxn,
            cols=['issue_id'],
            where=[
                ('overruled = %s', [False]),
                ('reason = %s', ['manual']),
                ('issue_id IS NOT NULL', []),
                ('created > %s', [since.isoformat()]),
            ],
            offset=offset,
            limit=limit,
        )

        issue_ids = [long(row[0]) for row in results if row[0]]
        issues = issue_service.GetIssues(cnxn, issue_ids)
        comments = issue_service.GetCommentsForIssues(cnxn, issue_ids)
        first_comments = {}
        for issue in issues:
            first_comments[issue.issue_id] = (
                comments[issue.issue_id][0].content
                if issue.issue_id in comments else "[Empty]")

        count = self.verdict_tbl.SelectValue(cnxn,
                                             col='COUNT(*)',
                                             where=[
                                                 ('overruled = %s', [False]),
                                                 ('reason = %s', ['manual']),
                                                 ('issue_id IS NOT NULL', []),
                                                 ('created > %s',
                                                  [since.isoformat()]),
                                             ])

        return issues, first_comments, count

    def GetTrainingComments(self,
                            cnxn,
                            issue_service,
                            since,
                            offset=0,
                            limit=100):
        """Returns list of recent comments with human-labeled spam/ham verdicts.
    """

        # get all of the manual verdicts in the past day.
        results = self.verdict_tbl.Select(
            cnxn,
            cols=['comment_id'],
            where=[
                ('overruled = %s', [False]),
                ('reason = %s', ['manual']),
                ('comment_id IS NOT NULL', []),
                ('created > %s', [since.isoformat()]),
            ],
            offset=offset,
            limit=limit,
        )

        comment_ids = [long(row[0]) for row in results if row[0]]
        # Don't care about sequence numbers in this context yet.
        comments = issue_service.GetCommentsByID(cnxn, comment_ids,
                                                 defaultdict(int))

        count = self.verdict_tbl.SelectValue(cnxn,
                                             col='COUNT(*)',
                                             where=[
                                                 ('overruled = %s', [False]),
                                                 ('reason = %s', ['manual']),
                                                 ('comment_id IS NOT NULL',
                                                  []),
                                                 ('created > %s',
                                                  [since.isoformat()]),
                                             ])

        return comments, count
Exemplo n.º 26
0
class SpamService(object):
  """The persistence layer for spam reports."""
  issue_actions = ts_mon.CounterMetric(
      'monorail/spam_svc/issue',
      'Count of things that happen to issues.',
      [ts_mon.StringField('type')])
  comment_actions = ts_mon.CounterMetric(
      'monorail/spam_svc/comment',
      'Count of things that happen to comments.',
      [ts_mon.StringField('type')])
  ml_engine_failures = ts_mon.CounterMetric(
      'monorail/spam_svc/ml_engine_failure',
      'Failures calling the ML Engine API',
      None)

  def __init__(self):
    self.report_tbl = sql.SQLTableManager(SPAMREPORT_TABLE_NAME)
    self.verdict_tbl = sql.SQLTableManager(SPAMVERDICT_TABLE_NAME)
    self.issue_tbl = sql.SQLTableManager(ISSUE_TABLE)

    # ML Engine library is lazy loaded below.
    self.ml_engine = None

  def LookupIssuesFlaggers(self, cnxn, issue_ids):
    """Returns users who've reported the issues or their comments as spam.

    Returns a dictionary {issue_id: (issue_reporters, comment_reporters)}
    issue_reportes is a list of users who flagged the issue;
    comment_reporters element is a dictionary {comment_id: [user_ids]} where
    user_ids are the users who flagged that comment.
    """
    rows = self.report_tbl.Select(
        cnxn, cols=['issue_id', 'user_id', 'comment_id'],
        issue_id=issue_ids)

    reporters = collections.defaultdict(
        # Return a tuple of (issue_reporters, comment_reporters) as described
        # above.
        lambda: ([], collections.defaultdict(list)))

    for row in rows:
      issue_id = int(row[0])
      user_id = row[1]
      if row[2]:
        comment_id = row[2]
        reporters[issue_id][1][comment_id].append(user_id)
      else:
        reporters[issue_id][0].append(user_id)

    return reporters

  def LookupIssueFlaggers(self, cnxn, issue_id):
    """Returns users who've reported the issue or its comments as spam.

    Returns a tuple. First element is a list of users who flagged the issue;
    second element is a dictionary of comment id to a list of users who flagged
    that comment.
    """
    return self.LookupIssuesFlaggers(cnxn, [issue_id])[issue_id]

  def LookupIssueFlagCounts(self, cnxn, issue_ids):
    """Returns a map of issue_id to flag counts"""
    rows = self.report_tbl.Select(cnxn, cols=['issue_id', 'COUNT(*)'],
                                  issue_id=issue_ids, group_by=['issue_id'])
    counts = {}
    for row in rows:
      counts[int(row[0])] = row[1]
    return counts

  def LookupIssueVerdicts(self, cnxn, issue_ids):
    """Returns a map of issue_id to most recent spam verdicts"""
    rows = self.verdict_tbl.Select(cnxn,
                                   cols=['issue_id', 'reason', 'MAX(created)'],
                                   issue_id=issue_ids, comment_id=None,
                                   group_by=['issue_id'])
    counts = {}
    for row in rows:
      counts[int(row[0])] = row[1]
    return counts

  def LookupIssueVerdictHistory(self, cnxn, issue_ids):
    """Returns a map of issue_id to most recent spam verdicts"""
    rows = self.verdict_tbl.Select(cnxn, cols=[
        'issue_id', 'reason', 'created', 'is_spam', 'classifier_confidence',
            'user_id', 'overruled'],
        issue_id=issue_ids, order_by=[('issue_id', []), ('created', [])])

    # TODO: group by issue_id, make class instead of dict for verdict.
    verdicts = []
    for row in rows:
      verdicts.append({
        'issue_id': row[0],
        'reason': row[1],
        'created': row[2],
        'is_spam': row[3],
        'classifier_confidence': row[4],
        'user_id': row[5],
        'overruled': row[6],
      })

    return verdicts

  def LookupCommentVerdictHistory(self, cnxn, comment_ids):
    """Returns a map of issue_id to most recent spam verdicts"""
    rows = self.verdict_tbl.Select(cnxn, cols=[
        'comment_id', 'reason', 'created', 'is_spam', 'classifier_confidence',
            'user_id', 'overruled'],
        comment_id=comment_ids, order_by=[('comment_id', []), ('created', [])])

    # TODO: group by comment_id, make class instead of dict for verdict.
    verdicts = []
    for row in rows:
      verdicts.append({
        'comment_id': row[0],
        'reason': row[1],
        'created': row[2],
        'is_spam': row[3],
        'classifier_confidence': row[4],
        'user_id': row[5],
        'overruled': row[6],
      })

    return verdicts

  def FlagIssues(self, cnxn, issue_service, issues, reporting_user_id,
                 flagged_spam):
    """Creates or deletes a spam report on an issue."""
    verdict_updates = []
    if flagged_spam:
      rows = [(issue.issue_id, issue.reporter_id, reporting_user_id)
          for issue in issues]
      self.report_tbl.InsertRows(cnxn, SPAMREPORT_ISSUE_COLS, rows,
          ignore=True)
    else:
      issue_ids = [issue.issue_id for issue in issues]
      self.report_tbl.Delete(
          cnxn, issue_id=issue_ids, user_id=reporting_user_id,
          comment_id=None)

    project_id = issues[0].project_id

    # Now record new verdicts and update issue.is_spam, if they've changed.
    ids = [issue.issue_id for issue in issues]
    counts = self.LookupIssueFlagCounts(cnxn, ids)
    previous_verdicts = self.LookupIssueVerdicts(cnxn, ids)

    for issue_id in counts:
      # If the flag counts changed enough to toggle the is_spam bit, need to
      # record a new verdict and update the Issue.

      # No number of user spam flags can overturn an admin's verdict.
      if previous_verdicts.get(issue_id) == REASON_MANUAL:
        continue

      # If enough spam flags come in, mark the issue as spam.
      if (flagged_spam and counts[issue_id] >= settings.spam_flag_thresh):
        verdict_updates.append(issue_id)

    if len(verdict_updates) == 0:
      return

    # Some of the issues may have exceed the flag threshold, so issue verdicts
    # and mark as spam in those cases.
    rows = [(issue_id, flagged_spam, REASON_THRESHOLD, project_id)
        for issue_id in verdict_updates]
    self.verdict_tbl.InsertRows(cnxn, THRESHVERDICT_ISSUE_COLS, rows,
        ignore=True)
    update_issues = []
    for issue in issues:
      if issue.issue_id in verdict_updates:
        issue.is_spam = flagged_spam
        update_issues.append(issue)

    if flagged_spam:
      self.issue_actions.increment_by(len(update_issues), {'type': 'flag'})

    issue_service.UpdateIssues(cnxn, update_issues, update_cols=['is_spam'])

  def FlagComment(self, cnxn, issue_id, comment_id, reported_user_id,
                  reporting_user_id, flagged_spam):
    """Creates or deletes a spam report on a comment."""
    # TODO(seanmccullough): Bulk comment flagging? There's no UI for that.
    if flagged_spam:
      self.report_tbl.InsertRow(
          cnxn, ignore=True, issue_id=issue_id,
          comment_id=comment_id, reported_user_id=reported_user_id,
          user_id=reporting_user_id)
      self.comment_actions.increment({'type': 'flag'})
    else:
      self.report_tbl.Delete(
          cnxn, issue_id=issue_id, comment_id=comment_id,
          user_id=reporting_user_id)

  def RecordClassifierIssueVerdict(self, cnxn, issue, is_spam, confidence,
        fail_open):
    reason = REASON_FAIL_OPEN if fail_open else REASON_CLASSIFIER
    self.verdict_tbl.InsertRow(cnxn, issue_id=issue.issue_id, is_spam=is_spam,
        reason=reason, classifier_confidence=confidence,
        project_id=issue.project_id)
    if is_spam:
      self.issue_actions.increment({'type': 'classifier'})
    # This is called at issue creation time, so there's nothing else to do here.

  def RecordManualIssueVerdicts(self, cnxn, issue_service, issues, user_id,
                                is_spam):
    rows = [(user_id, issue.issue_id, is_spam, REASON_MANUAL, issue.project_id)
        for issue in issues]
    issue_ids = [issue.issue_id for issue in issues]

    # Overrule all previous verdicts.
    self.verdict_tbl.Update(cnxn, {'overruled': True}, [
        ('issue_id IN (%s)' % sql.PlaceHolders(issue_ids), issue_ids)
        ], commit=False)

    self.verdict_tbl.InsertRows(cnxn, MANUALVERDICT_ISSUE_COLS, rows,
        ignore=True)

    for issue in issues:
      issue.is_spam = is_spam

    if is_spam:
      self.issue_actions.increment_by(len(issues), {'type': 'manual'})
    else:
      issue_service.AllocateNewLocalIDs(cnxn, issues)

    # This will commit the transaction.
    issue_service.UpdateIssues(cnxn, issues, update_cols=['is_spam'])

  def RecordManualCommentVerdict(self, cnxn, issue_service, user_service,
        comment_id, user_id, is_spam):
    # TODO(seanmccullough): Bulk comment verdicts? There's no UI for that.
    self.verdict_tbl.InsertRow(cnxn, ignore=True,
      user_id=user_id, comment_id=comment_id, is_spam=is_spam,
      reason=REASON_MANUAL)
    comment = issue_service.GetComment(cnxn, comment_id)
    comment.is_spam = is_spam
    issue = issue_service.GetIssue(cnxn, comment.issue_id, use_cache=False)
    issue_service.SoftDeleteComment(
        cnxn, issue, comment, user_id, user_service, is_spam, True, is_spam)
    if is_spam:
      self.comment_actions.increment({'type': 'manual'})

  def RecordClassifierCommentVerdict(self, cnxn, comment, is_spam, confidence,
      fail_open):
    reason = REASON_FAIL_OPEN if fail_open else REASON_CLASSIFIER
    self.verdict_tbl.InsertRow(cnxn, comment_id=comment.id, is_spam=is_spam,
        reason=reason, classifier_confidence=confidence,
        project_id=comment.project_id)
    if is_spam:
      self.comment_actions.increment({'type': 'classifier'})

  def _predict(self, instance):
    """Requests a prediction from the ML Engine API.

    Sample API response:
      {'predictions': [{
        'classes': ['0', '1'],
        'scores': [0.4986788034439087, 0.5013211965560913]
      }]}

    This hits the default model.

    Returns:
      A floating point number representing the confidence
      the instance is spam.
    """
    model_name = 'projects/%s/models/%s' % (
      settings.classifier_project_id, settings.spam_model_name)
    body = {'instances': [{"inputs": instance["word_hashes"]}]}

    if not self.ml_engine:
      self.ml_engine = ml_helpers.setup_ml_engine()

    request = self.ml_engine.projects().predict(name=model_name, body=body)
    response = request.execute()
    logging.info('ML Engine API response: %r' % response)
    prediction = response['predictions'][0]

    # Ensure the class confidence we return is for the spam, not the ham label.
    # The spam label, '1', is usually at index 1 but I'm not sure of any
    # guarantees around label order.
    if prediction['classes'][1] == SPAM_CLASS_LABEL:
      return prediction['scores'][1]
    elif prediction['classes'][0] == SPAM_CLASS_LABEL:
      return prediction['scores'][0]
    else:
      raise Exception('No predicted classes found.')

  def _IsExempt(self, author, is_project_member):
    """Return True if the user is exempt from spam checking."""
    if author.email is not None and author.email.endswith(
        settings.spam_whitelisted_suffixes):
      logging.info('%s whitelisted from spam filtering', author.email)
      return True

    if is_project_member:
      logging.info('%s is a project member, assuming ham', author.email)
      return True

    return False

  def ClassifyIssue(self, issue, firstComment, reporter, is_project_member):
    """Classify an issue as either spam or ham.

    Args:
      issue: the Issue.
      firstComment: the first Comment on issue.
      reporter: User PB for the Issue reporter.
      is_project_member: True if reporter is a member of issue's project.

    Returns a JSON dict of classifier prediction results from
    the ML Engine API.
    """
    instance = ml_helpers.GenerateFeaturesRaw(
        [issue.summary, firstComment.content],
        settings.spam_feature_hashes)
    return self._classify(instance, reporter, is_project_member)

  def ClassifyComment(self, comment_content, commenter, is_project_member=True):
    """Classify a comment as either spam or ham.

    Args:
      comment: the comment text.
      commenter: User PB for the user who authored the comment.

    Returns a JSON dict of classifier prediction results from
    the ML Engine API.
    """
    instance = ml_helpers.GenerateFeaturesRaw(
        ['', comment_content],
        settings.spam_feature_hashes)
    return self._classify(instance, commenter, is_project_member)


  def _classify(self, instance, author, is_project_member):
    # Fail-safe: not spam.
    result = self.ham_classification()

    if self._IsExempt(author, is_project_member):
      return result

    if not self.ml_engine:
      self.ml_engine = ml_helpers.setup_ml_engine()

    # If setup_ml_engine returns None, it failed to init.
    if not self.ml_engine:
      logging.error("ML Engine not initialized.")
      self.ml_engine_failures.increment()
      result['failed_open'] = True
      return result

    remaining_retries = 3
    while remaining_retries > 0:
      try:
        result['confidence_is_spam'] = self._predict(instance)
        result['failed_open'] = False
        return result
      except Exception as ex:
        remaining_retries = remaining_retries - 1
        self.ml_engine_failures.increment()
        logging.error('Error calling ML Engine API: %s' % ex)

      result['failed_open'] = True
    return result

  def ham_classification(self):
    return {'confidence_is_spam': 0.0,
            'failed_open': False}

  def GetIssueClassifierQueue(
      self, cnxn, _issue_service, project_id, offset=0, limit=10):
     """Returns list of recent issues with spam verdicts,
     ranked in ascending order of confidence (so uncertain items are first).
     """
     # TODO(seanmccullough): Optimize pagination. This query probably gets
     # slower as the number of SpamVerdicts grows, regardless of offset
     # and limit values used here.  Using offset,limit in general may not
     # be the best way to do this.
     issue_results = self.verdict_tbl.Select(cnxn,
         cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence',
               'created'],
         where=[
             ('project_id = %s', [project_id]),
             ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
             ('overruled = %s', [False]),
             ('issue_id IS NOT NULL', []),
         ],
         order_by=[
             ('classifier_confidence ASC', []),
             ('created ASC', []),
             ],
         group_by=['issue_id'],
         offset=offset,
         limit=limit,
         )

     ret = []
     for row in issue_results:
       ret.append(ModerationItem(
         issue_id=int(row[0]),
         is_spam=row[1] == 1,
         reason=row[2],
         classifier_confidence=row[3],
         verdict_time='%s' % row[4],
       ))

     count = self.verdict_tbl.SelectValue(cnxn,
         col='COUNT(*)',
         where=[
             ('project_id = %s', [project_id]),
             ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
             ('overruled = %s', [False]),
             ('issue_id IS NOT NULL', []),
         ])

     return ret, count

  def GetIssueFlagQueue(
      self, cnxn, _issue_service, project_id, offset=0, limit=10):
     """Returns list of recent issues that have been flagged by users"""
     issue_flags = self.report_tbl.Select(cnxn,
         cols = ["Issue.project_id", "Report.issue_id", "count(*) as count",
                 "max(Report.created) as latest",
                 "count(distinct Report.user_id) as users"],
         left_joins=["Issue ON Issue.id = Report.issue_id"],
         where=[('Report.issue_id IS NOT NULL', []),
                ("Issue.project_id == %v", [project_id])],
         order_by=[('count DESC', [])],
         group_by=['Report.issue_id'],
         offset=offset, limit=limit)
     ret = []
     for row in issue_flags:
       ret.append(ModerationItem(
         project_id=row[0],
         issue_id=row[1],
         count=row[2],
         latest_report=row[3],
         num_users=row[4],
       ))

     count = self.verdict_tbl.SelectValue(cnxn,
         col='COUNT(DISTINCT Report.issue_id)',
         where=[('Issue.project_id = %s', [project_id])],
         left_joins=["Issue ON Issue.id = SpamReport.issue_id"])
     return ret, count


  def GetCommentClassifierQueue(
      self, cnxn, _issue_service, project_id, offset=0, limit=10):
     """Returns list of recent comments with spam verdicts,
     ranked in ascending order of confidence (so uncertain items are first).
     """
     # TODO(seanmccullough): Optimize pagination. This query probably gets
     # slower as the number of SpamVerdicts grows, regardless of offset
     # and limit values used here.  Using offset,limit in general may not
     # be the best way to do this.
     comment_results = self.verdict_tbl.Select(cnxn,
         cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence',
               'created'],
         where=[
             ('project_id = %s', [project_id]),
             ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
             ('overruled = %s', [False]),
             ('comment_id IS NOT NULL', []),
         ],
         order_by=[
             ('classifier_confidence ASC', []),
             ('created ASC', []),
             ],
         group_by=['comment_id'],
         offset=offset,
         limit=limit,
         )

     ret = []
     for row in comment_results:
       ret.append(ModerationItem(
         comment_id=int(row[0]),
         is_spam=row[1] == 1,
         reason=row[2],
         classifier_confidence=row[3],
         verdict_time='%s' % row[4],
       ))

     count = self.verdict_tbl.SelectValue(cnxn,
         col='COUNT(*)',
         where=[
             ('project_id = %s', [project_id]),
             ('classifier_confidence <= %s',
                 [settings.classifier_moderation_thresh]),
             ('overruled = %s', [False]),
             ('comment_id IS NOT NULL', []),
         ])

     return ret, count


  def GetTrainingIssues(self, cnxn, issue_service, since, offset=0, limit=100):
    """Returns list of recent issues with human-labeled spam/ham verdicts.
    """

    # get all of the manual verdicts in the past day.
    results = self.verdict_tbl.Select(cnxn,
        cols=['issue_id'],
        where=[
            ('overruled = %s', [False]),
            ('reason = %s', ['manual']),
            ('issue_id IS NOT NULL', []),
            ('created > %s', [since.isoformat()]),
        ],
        offset=offset,
        limit=limit,
        )

    issue_ids = [int(row[0]) for row in results if row[0]]
    issues = issue_service.GetIssues(cnxn, issue_ids)
    comments = issue_service.GetCommentsForIssues(cnxn, issue_ids)
    first_comments = {}
    for issue in issues:
      first_comments[issue.issue_id] = (comments[issue.issue_id][0].content
          if issue.issue_id in comments else "[Empty]")

    count = self.verdict_tbl.SelectValue(cnxn,
        col='COUNT(*)',
        where=[
            ('overruled = %s', [False]),
            ('reason = %s', ['manual']),
            ('issue_id IS NOT NULL', []),
            ('created > %s', [since.isoformat()]),
        ])

    return issues, first_comments, count

  def GetTrainingComments(self, cnxn, issue_service, since, offset=0,
      limit=100):
    """Returns list of recent comments with human-labeled spam/ham verdicts.
    """

    # get all of the manual verdicts in the past day.
    results = self.verdict_tbl.Select(
        cnxn,
        distinct=True,
        cols=['comment_id'],
        where=[
            ('overruled = %s', [False]),
            ('reason = %s', ['manual']),
            ('comment_id IS NOT NULL', []),
            ('created > %s', [since.isoformat()]),
        ],
        offset=offset,
        limit=limit,
        )

    comment_ids = [int(row[0]) for row in results if row[0]]
    # Don't care about sequence numbers in this context yet.
    comments = issue_service.GetCommentsByID(cnxn, comment_ids,
        defaultdict(int))
    return comments

  def ExpungeUsersInSpam(self, cnxn, user_ids):
    """Removes all references to given users from Spam DB tables.

    This method will not commit the operations. This method will
    not make changes to in-memory data.
    """
    commit = False
    self.report_tbl.Delete(cnxn, reported_user_id=user_ids, commit=commit)

    delta = {'user_id': framework_constants.DELETED_USER_ID}
    self.report_tbl.Update(cnxn, delta, user_id=user_ids, commit=commit)
    self.verdict_tbl.Update(cnxn, delta, user_id=user_ids, commit=commit)