예제 #1
0
def RunOnce(client, job, callback):
  """Get list of files and call processing function."""
  logs_paths = logs_util.ServerLogsPaths('viewfinder', 'full')
  merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET)

  start_date = options.options.start_date
  if options.options.smart_scan:
    # Search for successful full-scan run in the last week.
    last_run = yield gen.Task(job.FindLastSuccess, with_payload_key='stats.last_day')
    if last_run is None:
      logging.info('No previous successful scan found, rerun with --start_date')
      callback(None)
      return

    last_run_start = last_run['start_time']
    if util.HoursSince(last_run_start) < options.options.hours_between_runs:
      logging.info('Last successful run started at %s, less than %d hours ago; skipping.' %
                   (time.asctime(time.localtime(last_run_start)), options.options.hours_between_runs))
      callback(None)
      return

    last_day = last_run['stats.last_day']
    # Set scan_start to start of previous run - 1d. The extra 1d is in case some logs were pushed to S3 late.
    # This really recomputes two days (the last day that was successfully processed and the one prior).
    start_time = util.ISO8601ToUTCTimestamp(last_day, hour=12) - constants.SECONDS_PER_DAY
    start_date = util.TimestampUTCToISO8601(start_time)
    logging.info('Last successful analyze_logs run (%s) scanned up to %s, setting analysis start date to %s' %
                 (time.asctime(time.localtime(last_run_start)), last_day, start_date))

  # Fetch list of merged logs.
  files = yield gen.Task(GetMergedLogsFileList, merged_store, logs_paths, start_date)
  last_day = yield gen.Task(ProcessFiles, merged_store, logs_paths, files)
  callback(last_day)
  return
예제 #2
0
 def _GetMetric(self, day, h=12, m=0, s=0):
     timestamp = util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s)
     existing_metrics = self._RunAsync(metric.Metric.QueryTimespan,
                                       self._client, self._group_key,
                                       timestamp, timestamp)
     if len(existing_metrics) == 0:
         return None
     return DotDict(json.loads(existing_metrics[0].payload))
예제 #3
0
def DayTimeStringsToUTCTimestamp(day, time):
  """Given day (YYYY-MM-DD) and time (HH:MM:SS:ms) strings, return the timestamp in UTC, or None if parsing failed."""
  try:
    hour, minute, second, _ = re.match(kTimeRe, time).groups()
    return util.ISO8601ToUTCTimestamp(day, hour=int(hour), minute=int(minute), second=int(second))
  except Exception as e:
    logging.warning('Error parsing day and time strings: %s %s, error: %r' % (day, time, e))
    return None
예제 #4
0
def RunOnce(client, job, apple_id, password, callback):
    start_date = yield gen.Task(DetermineStartDate, client, job)
    if start_date is None:
        logging.info(
            'Start date not specified, last run too recent, or smart_scan could not determine a date; exiting.'
        )
        callback(None)
        return

    query_dates = []
    start_time = util.ISO8601ToUTCTimestamp(start_date)
    today = util.NowUTCToISO8601()
    while start_time < time.time():
        date = util.TimestampUTCToISO8601(start_time)
        if date != today:
            query_dates.append(date)
        start_time += constants.SECONDS_PER_DAY

    logging.info('fetching data for dates: %r' % query_dates)
    results = {}
    itc = ITunesTrends(apple_id, password, options.options.vendor_id)
    failed = False
    for day in query_dates:
        if failed:
            break
        try:
            result = yield gen.Task(itc.FetchOneDay, day)
            if not result:
                # We don't get an exception when iTunesConnect has no data. We also don't want to
                # fail as there's no way it will have this data later.
                logging.warning('No data for day %s' % day)
            else:
                results[day] = result
        except Exception:
            msg = traceback.format_exc()
            logging.warning(
                'Error fetching iTunes Connect data for day %s: %s', day, msg)
            failed = True

    if len(results) == 0:
        callback(None)
    else:
        # Replace the entire 'itunes' category of previous metrics. This is so we can fix any processing errors we
        # may have had.
        hms = logs_util.kDailyMetricsTimeByLogType['itunes_trends']
        yield gen.Task(logs_util.UpdateMetrics,
                       client,
                       results,
                       prefix_to_erase='itunes',
                       dry_run=options.options.dry_run,
                       hms_tuple=hms)
        callback(sorted(results.keys())[-1])
예제 #5
0
def RunOnce(client, job, callback):
  """Get list of files and call processing function."""
  merged_store = ObjectStore.GetInstance(logs_util.UserAnalyticsLogsPaths.MERGED_LOGS_BUCKET)

  start_date = options.options.start_date
  if options.options.smart_scan:
    # Search for successful full-scan run in the last week.
    last_run = yield gen.Task(job.FindLastSuccess, with_payload_key='stats.last_day')
    if last_run is None:
      logging.info('No previous successful scan found, rerun with --start_date')
      callback(None)
      return

    last_run_start = last_run['start_time']
    if util.HoursSince(last_run_start) < options.options.hours_between_runs:
      logging.info('Last successful run started at %s, less than %d hours ago; skipping.' %
                   (time.asctime(time.localtime(last_run_start)), options.options.hours_between_runs))
      callback(None)
      return

    last_day = last_run['stats.last_day']
    # Set scan_start to start of previous run - 30d (we need 30 days' worth of data to properly compute
    # 30-day active users. Add an extra 3 days just in case we had some missing logs during the last run.
    start_time = util.ISO8601ToUTCTimestamp(last_day, hour=12) - constants.SECONDS_PER_WEEK
    start_date = util.TimestampUTCToISO8601(start_time)
    logging.info('Last successful analyze_analytics run (%s) scanned up to %s, setting analysis start date to %s' %
                 (time.asctime(time.localtime(last_run_start)), last_day, start_date))

  # Fetch list of merged logs.
  files = yield gen.Task(GetMergedLogsFileList, merged_store, start_date)
  day_stats = yield gen.Task(ProcessFiles, merged_store, files)

  # Write per-day stats to dynamodb.
  if len(day_stats) > 0:
    hms = logs_util.kDailyMetricsTimeByLogType['analytics_logs']
    yield gen.Task(logs_util.UpdateMetrics, client, day_stats, dry_run=options.options.dry_run, hms_tuple=hms)
    last_day = sorted(day_stats.keys())[-1]
    callback(last_day)
  else:
    callback(None)
예제 #6
0
def DetermineStartDate(client, job, callback):
    """If smart_scan is true, lookup the start date from previous job summaries, otherwise use --start_date.
  --start_date and job summary days are of the form YYYY-MM-DD.
  """
    start_date = options.options.start_date

    # Lookup previous runs started in the last week.
    if options.options.smart_scan:
        # Search for successful full-scan run in the last week.
        last_run = yield gen.Task(job.FindLastSuccess,
                                  with_payload_key='stats.last_day')
        if last_run is None:
            logging.info(
                'No previous successful scan found, rerun with --start_date')
            callback(None)
            return

        last_run_start = last_run['start_time']
        if (last_run_start +
                options.options.hours_between_runs * constants.SECONDS_PER_HOUR
                > time.time()):
            logging.info(
                'Last successful run started at %s, less than %d hours ago; skipping.'
                % (time.asctime(time.localtime(last_run_start)),
                   options.options.hours_between_runs))
            callback(None)
            return

        # Start start_date to the last processed day + 1.
        last_day = last_run['stats.last_day']
        start_time = util.ISO8601ToUTCTimestamp(
            last_day) + constants.SECONDS_PER_DAY
        start_date = util.TimestampUTCToISO8601(start_time)
        logging.info(
            'Last successful run (%s) scanned up to %s, setting start date to %s'
            % (time.asctime(
                time.localtime(last_run_start)), last_day, start_date))

    callback(start_date)
예제 #7
0
 def _WriteMetric(self, day, dotdict):
     new_metric = metric.Metric.Create(
         self._group_key, 'logs_daily',
         util.ISO8601ToUTCTimestamp(day, hour=12), json.dumps(dotdict))
     self._RunAsync(new_metric.Update, self._client)
예제 #8
0
def UpdateMetrics(db_client, day_stats, callback, dry_run=True, prefix_to_erase=None, hms_tuple=None):
  """Write 'day_stats' to the metrics table. First lookup any existing metrics and update them.
  'day_stats' is a dictionary of {day_in_iso8601: DotDict}.
  If 'dry_run' is True, don't commit the changes to the metrics table, but perform all the work and log to info.
  If 'prefix_to_erase' is not None, we first replace the passed-in prefix with an empty dotdict.
  If 'hms_tuple' is not None, the timestamp for the metric entry will be with the specified hour/minute/second,
  otherwise, we use noon. To help with consistency, hms_tuple should come from kDailyMetricsTimeByLogType above.

  For example, given the existing metric: { itunes: { downloads: { 'US': 5, 'UK': 3 }, update: { ... }}}
  We can either:
    - Replace the downloads numbers: (the entire tree under 'prefix_to_erase' gets replaced)
      UpdateMetrics({'2013-02-01': {'itunes': {'downloads': { 'DE': 3, 'FR': 1 }}}}, prefix_to_erase='itunes.downloads')
      resulting in: { itunes: { downloads: { 'DE': 3, 'FR': 1 }, update: { ... }}}
    - Or we can update with partial stats:
      UpdateMetrics({'2013-02-01': {'itunes': { 'downloads': { 'DE': 3, 'FR': 1 }}}}, replace=False)
      resulting in: { itunes: { downloads: { 'US': 5, 'UK': 3, 'DE': 3, 'FR': 3 }, update: { ... }}}
  """
  if len(day_stats) == 0:
    callback()
    return

  cluster = metric.LOGS_STATS_NAME
  group_key = metric.Metric.EncodeGroupKey(cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily'))

  # Convert YYYY-MM-DD into the timestamp for noon UTC.
  h, m, s = hms_tuple if hms_tuple is not None else (12, 0, 0)
  timestamps = [(util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s), day) for day in sorted(day_stats.keys())]

  # Query Metrics table for all metrics between the timestamps we have data for.
  existing_metrics = yield gen.Task(metric.Metric.QueryTimespan, db_client, group_key,
                                    timestamps[0][0], timestamps[-1][0])
  existing_dict = dict((m.timestamp, m) for m in existing_metrics)

  tasks = []
  for t, day in timestamps:
    data = day_stats[day]
    prev_metric = existing_dict.get(t, None)

    payload = json.dumps(data)
    if prev_metric is None:
      logging.info('%s: new metric: %r' % (day, payload))
    else:
      prev_payload = prev_metric.payload
      # We do this twice, it's simpler than making deepcopy work on DotDict.
      prev_data = DotDict(json.loads(prev_payload))
      new_data = DotDict(json.loads(prev_payload))
      if prefix_to_erase is not None:
        # We can't call 'del' on a DotDict's internals, so simply replace with an empty dotdict, we'll be repopulating.
        new_data[prefix_to_erase] = DotDict()

      # DotDict doesn't have an update() method.
      flat = new_data.flatten()
      flat.update(data.flatten())
      new_data = DotDict(flat)

      payload = json.dumps(new_data)
      if new_data.flatten() == prev_data.flatten():
        logging.info('%s: metric has not changed, skipping' % day)
        continue
      else:
        logging.info('%s: changed metric: %s -> %s' % (day, prev_payload, payload))

    if not dry_run:
      new_metric = metric.Metric.Create(group_key, 'logs_daily', t, payload)
      tasks.append(gen.Task(new_metric.Update, db_client))

  yield tasks
  callback()
예제 #9
0
def RunOnce(client, job, callback):
    """Get list of files and call processing function."""
    logs_paths = logs_util.ServerLogsPaths('viewfinder', 'full')
    merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET)

    start_date = options.options.start_date
    if options.options.smart_scan:
        # Search for successful full-scan run in the last week.
        last_run = yield gen.Task(job.FindLastSuccess,
                                  with_payload_key='stats.last_day')
        if last_run is None:
            logging.info(
                'No previous successful scan found, rerun with --start_date')
            callback(None)
            return

        last_run_start = last_run['start_time']
        if util.HoursSince(
                last_run_start) < options.options.hours_between_runs:
            logging.info(
                'Last successful run started at %s, less than %d hours ago; skipping.'
                % (time.asctime(time.localtime(last_run_start)),
                   options.options.hours_between_runs))
            callback(None)
            return

        # Compute stats for all days since the last run (inclusive).
        start_date = last_run['stats.last_day']
        logging.info(
            'Last successful server_log_metrics run (%s) scanned up to %s' %
            (time.asctime(time.localtime(last_run_start)), start_date))

    assert start_date is not None, 'No start date provided. Use --start_date=YYYY-MM-DD or --smart_scan=True'

    last_day = None
    if options.options.compute_user_requests:
        # We look for files going back 32 days (30 because we need 30-day active users, and +2 for extra safety).
        user_start_time = util.ISO8601ToUTCTimestamp(
            start_date, hour=12) - 32 * constants.SECONDS_PER_DAY
        user_start_date = util.TimestampUTCToISO8601(user_start_time)
        # Fetch list of per-day user request stats.
        files = yield gen.Task(GetFileList, merged_store, 'user_requests',
                               user_start_date)
        user_request_stats = yield gen.Task(ComputeUserRequests, merged_store,
                                            files)

        # Write per-day stats to dynamodb.
        if len(user_request_stats) > 0:
            # We do not replace the 'active_users' category in previous metrics as we may not be recomputing all data
            # eg: we need to analyze 30 days of logs to get the full metric info.
            hms = logs_util.kDailyMetricsTimeByLogType['active_users']
            yield gen.Task(logs_util.UpdateMetrics,
                           client,
                           user_request_stats,
                           dry_run=options.options.dry_run,
                           hms_tuple=hms)
            last_day = sorted(user_request_stats.keys())[-1]

    if options.options.compute_registration_delay:
        # We compute stats for days within the registration window (plus an extra two for safety).
        device_start_time = util.ISO8601ToUTCTimestamp(start_date, hour=12) - \
                            (kDeviceRegistrationWindowDays + 2) * constants.SECONDS_PER_DAY
        device_start_date = util.TimestampUTCToISO8601(device_start_time)
        # However, we search for files for an extra 15 days. This is so that unregistered devices that ping every day
        # are not counted as starting on the first day of stats.
        device_search_time = device_start_time - 15 * constants.SECONDS_PER_DAY
        device_search_date = util.TimestampUTCToISO8601(device_search_time)
        # Fetch list of merged logs.
        files = yield gen.Task(GetFileList, merged_store, 'device_details',
                               device_search_date)
        device_stats = yield gen.Task(ComputeRegistrationDelay, merged_store,
                                      files, device_start_date)

        # Write per-day stats to dynamodb.
        if len(device_stats) > 0:
            # Replace the entire 'device_installs.registration' category
            hms = logs_util.kDailyMetricsTimeByLogType['device_installs']
            yield gen.Task(logs_util.UpdateMetrics,
                           client,
                           device_stats,
                           dry_run=options.options.dry_run,
                           hms_tuple=hms,
                           prefix_to_erase='device_installs.registration')

            last_day_device = sorted(device_stats.keys())[-1]
            if last_day is None or last_day > last_day_device:
                # We consider the last successful processing day to be the earlier of the two.
                last_day = last_day_device

    if options.options.compute_app_versions:
        # Look at an extra two days for safety.
        version_start_time = util.ISO8601ToUTCTimestamp(
            start_date, hour=12) - 2 * constants.SECONDS_PER_DAY
        version_start_date = util.TimestampUTCToISO8601(version_start_time)
        # Fetch list of merged logs.
        files = yield gen.Task(GetFileList, merged_store, 'device_details',
                               version_start_date)
        version_stats = yield gen.Task(ComputeAppVersions, merged_store, files)

        # Write per-day stats to dynamodb.
        if len(version_stats) > 0:
            # Replace the entire 'device_installs.registration' category
            hms = logs_util.kDailyMetricsTimeByLogType['device_count']
            yield gen.Task(logs_util.UpdateMetrics,
                           client,
                           version_stats,
                           dry_run=options.options.dry_run,
                           hms_tuple=hms,
                           prefix_to_erase='device_count')

            last_day_device = sorted(version_stats.keys())[-1]
            if last_day is None or last_day > last_day_device:
                # We consider the last successful processing day to be the earlier of the two.
                last_day = last_day_device

    if options.options.compute_traces or options.options.compute_aborts:
        # Fetch an extra 7 days to filter out previously-seen traces.
        trace_start_time = (
            util.ISO8601ToUTCTimestamp(start_date, hour=12) -
            options.options.extra_trace_days * constants.SECONDS_PER_DAY)
        trace_start_date = util.TimestampUTCToISO8601(trace_start_time)
        # Fetch list of merged logs.
        files = yield gen.Task(GetFileList, merged_store, 'traces',
                               trace_start_date)
        # Run them separately, we'll be sending separate email reports.
        if options.options.compute_traces:
            yield gen.Task(ComputeTraces, merged_store, files, start_date)
        if options.options.compute_aborts:
            yield gen.Task(ComputeAborts, merged_store, files, start_date)

    callback(last_day)