Exemplo n.º 1
0
def RunOnce(callback):
  """Get list of files and call processing function."""
  dry_run = options.options.dry_run

  logs_paths = logs_util.ServerLogsPaths('viewfinder', 'full')

  if dry_run:
    logging.warning('dry_run=True: will not upload processed logs files or update registry')

  logs_store = ObjectStore.GetInstance(logs_paths.SOURCE_LOGS_BUCKET)
  merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET)

  # Fetch list of raw logs files.
  files = yield gen.Task(GetRawLogsFileList, logs_store, logs_paths, options.options.start_date)

  # Fetch list of processed logs files from registry.
  processed_files = yield gen.Task(logs_util.GetRegistry, merged_store, logs_paths.ProcessedRegistryPath())
  if processed_files is None:
    # None means: registry does not exist. All other errors throw exceptions.
    processed_files = []

  # Compute list of raw files to process (and sort by filename -> sort by date).
  files_set = set(files)
  processed_set = set(processed_files)
  missing_files = list(files_set.difference(processed_set))
  missing_files.sort()

  to_process = missing_files
  if options.options.max_files_to_process is not None:
    to_process = missing_files[0:options.options.max_files_to_process]

  logging.info('found %d raw files and %d processed files, %d missing. Will process %d.' %
               (len(files), len(processed_files), len(missing_files), len(to_process)))
  if len(missing_files) == 0:
    logging.info('No raw logs files to process.')
    callback()
    return

  merged_files = yield gen.Task(ProcessFiles, logs_store, merged_store, logs_paths, to_process, dry_run)
  logging.info('found %d raw files and %d processed files, %d missing, successfully processed %d' %
               (len(files), len(processed_files), len(missing_files), len(merged_files)))

  # Add processed files to registry and write to S3.
  # TODO(marc): any failure in merged log upload or registry upload will cause us to get out of sync. To fix this,
  # we should also have a list of properly applied processed logs.
  processed_files.extend(merged_files)
  processed_files.sort()
  if not dry_run:
    yield gen.Task(retry.CallWithRetryAsync, kS3UploadRetryPolicy,
                   logs_util.WriteRegistry, merged_store, logs_paths.ProcessedRegistryPath(), processed_files)

  callback()
Exemplo n.º 2
0
def RunOnce(client, job, callback):
  """Get list of files and call processing function."""
  logs_paths = logs_util.ServerLogsPaths('viewfinder', 'full')
  merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET)

  start_date = options.options.start_date
  if options.options.smart_scan:
    # Search for successful full-scan run in the last week.
    last_run = yield gen.Task(job.FindLastSuccess, with_payload_key='stats.last_day')
    if last_run is None:
      logging.info('No previous successful scan found, rerun with --start_date')
      callback(None)
      return

    last_run_start = last_run['start_time']
    if util.HoursSince(last_run_start) < options.options.hours_between_runs:
      logging.info('Last successful run started at %s, less than %d hours ago; skipping.' %
                   (time.asctime(time.localtime(last_run_start)), options.options.hours_between_runs))
      callback(None)
      return

    last_day = last_run['stats.last_day']
    # Set scan_start to start of previous run - 1d. The extra 1d is in case some logs were pushed to S3 late.
    # This really recomputes two days (the last day that was successfully processed and the one prior).
    start_time = util.ISO8601ToUTCTimestamp(last_day, hour=12) - constants.SECONDS_PER_DAY
    start_date = util.TimestampUTCToISO8601(start_time)
    logging.info('Last successful analyze_logs run (%s) scanned up to %s, setting analysis start date to %s' %
                 (time.asctime(time.localtime(last_run_start)), last_day, start_date))

  # Fetch list of merged logs.
  files = yield gen.Task(GetMergedLogsFileList, merged_store, logs_paths, start_date)
  last_day = yield gen.Task(ProcessFiles, merged_store, logs_paths, files)
  callback(last_day)
  return
Exemplo n.º 3
0
def _UploadWelcomePhotos(http_client, client, user, upload_request):
    """Uploads a set of photos that will be used in the new user welcome conversation. These
  photos are uploaded to the given user account. "upload_request" is in the UPLOAD_EPISODE_REQUEST
  format in json_schema.py, except:

    1. Activity, episode, and photo ids are added by this method.
    2. Each photo dict must contain an additional "name" field which gives the start of the
       filename of a jpg file in the backend/resources/welcome directory. Three files must
       exist there, in this format: <name>_full.jpg, <name>_med.jpg, <name>_tn.jpg.
  """
    obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO)
    welcome_path = os.path.join(ResourcesManager.Instance().resources_path,
                                'welcome')

    # Set the ids of all activities, episodes, and photos in the welcome conversation.
    yield _SetWelcomeIds(user, upload_request)

    # Get copy and strip out names, which UploadEpisode chokes on.
    upload_request = deepcopy(upload_request)

    # Directly call the service API in order to upload the photo.
    upload_request_copy = deepcopy(upload_request)
    [ph_dict.pop('name') for ph_dict in upload_request_copy['photos']]
    upload_response = yield UploadEpisode(client, obj_store, user.user_id,
                                          user.webapp_dev_id,
                                          upload_request_copy)

    # Upload photo to blob store (in various formats).
    for request_ph_dict, response_ph_dict in zip(upload_request['photos'],
                                                 upload_response['photos']):
        for format in ('full', 'med', 'tn'):
            # Get the photo bits from disk.
            f = open(
                os.path.join(welcome_path,
                             '%s_%s.jpg' % (request_ph_dict['name'], format)),
                'r')
            image_data = f.read()
            f.close()

            photo_url = response_ph_dict[format + '_put_url']
            content_md5 = base64.b64encode(
                request_ph_dict[format + '_md5'].decode('hex'))
            headers = {
                'Content-Type': 'image/jpeg',
                'Content-MD5': content_md5
            }

            validate_cert = not options.options.fileobjstore
            response = yield gen.Task(http_client.fetch,
                                      photo_url,
                                      method='PUT',
                                      body=image_data,
                                      follow_redirects=False,
                                      validate_cert=validate_cert,
                                      headers=headers)
            if response.code != 200:
                raise Exception(
                    'Cannot upload photo "%s". HTTP error code %d. Is server running and accessible?'
                    % (request_ph_dict['photo_id'], response.code))
Exemplo n.º 4
0
 def __init__(self, client, user_id, email):
     super(BuildArchiveOperation, self).__init__(client)
     self._user_id = user_id
     self._email = email
     self._notify_timestamp = self._op.timestamp
     self._photo_obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO)
     self._user_zips_obj_store = ObjectStore.GetInstance(
         ObjectStore.USER_ZIPS)
     self._offboarding_assets_dir_path = ResourcesManager.Instance(
     ).GetOffboardingPath()
     self._temp_dir_path = os.path.join(
         ServerEnvironment.GetViewfinderTempDirPath(),
         BuildArchiveOperation._OFFBOARDING_DIR_NAME)
     self._zip_file_path = os.path.join(
         self._temp_dir_path, BuildArchiveOperation._ZIP_FILE_NAME)
     self._content_dir_path = os.path.join(
         self._temp_dir_path, BuildArchiveOperation._CONTENT_DIR_NAME)
     self._data_dir_path = os.path.join(self._content_dir_path,
                                        CONVO_FOLDER_NAME)
Exemplo n.º 5
0
def RunOnce(client, callback):
    object_store = ObjectStore.GetInstance(ObjectStore.SERVER_DATA)
    filenames = {}

    for num_days in options.options.analysis_intervals_days:
        filename = yield gen.Task(ProcessOneInterval, client, num_days)
        filenames[num_days] = filename

    yield gen.Task(UploadFiles, object_store, filenames.values())
    yield gen.Task(SendReport, object_store, filenames)
    callback()
Exemplo n.º 6
0
    def _CreateFormats():
        """Used to set up initial photos."""
        obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO)
        client = DBClient.Instance()
        http_client = AsyncHTTPClient()

        for photo_id, name in [('pgAZn77bJ-Kc', 'beach_c4'),
                               ('pgAzpz7bJ-Mc', 'beach_a1'),
                               ('pgB-Fh7bJ-Mg', 'beach_a2'),
                               ('pgAzo67bJ-MV', 'beach_a3'),
                               ('pgB-pj7bJ-Mo', 'beach_a4'),
                               ('pgAvIa7bJ-MN', 'beach_b1'),
                               ('pgAuoQ7bJ-MF', 'beach_b2'),
                               ('pgAtwd7bJ-M7', 'beach_b3'),
                               ('pgAaOJ7bJ-Kw', 'beach_c1'),
                               ('pgA_vm7bJ-Ko', 'beach_c2'),
                               ('pgAZna7bJ-Kk', 'beach_c3'),
                               ('pgAW0x7bJ-KV', 'beach_d1'),
                               ('pgAUMm7bJ-KN', 'beach_d2'),
                               ('pfYwYR7bJ-KJ', 'party_1'),
                               ('pfYwTk7bJ-KF', 'party_2'),
                               ('pfYwSo7bJ-K7', 'party_3'),
                               ('pfYw0g7bJ-K-', 'party_4'),
                               ('pfYvoK7bJ-Jw', 'party_5'),
                               ('pfYvhI7bJ-Jo', 'party_6'),
                               ('prHKwa7bJ-N30', 'gone_fishing_1'),
                               ('prBUtl7bJ-Mw', 'gone_fishing_2'),
                               ('pfSb0S7bJ-Jk', 'street_art_1'),
                               ('pfSasJ7bJ-Jc', 'street_art_2')]:

            photo = yield Photo.Query(client, photo_id, None)
            photo_dict = photo._asdict()
            photo_dict['name'] = name
            del photo_dict['photo_id']
            del photo_dict['user_id']
            del photo_dict['_version']
            del photo_dict['episode_id']
            print json.dumps(photo_dict, indent=True)

            for suffix, format in [('.f', 'full'), ('.m', 'med'),
                                   ('.t', 'tn')]:
                url = obj_store.GenerateUrl('%s%s' % (photo_id, suffix))
                response = yield http_client.fetch(url, method='GET')

                welcome_path = os.path.join(
                    ResourcesManager.Instance().resources_path, 'welcome')
                f = open(
                    os.path.join(welcome_path, '%s_%s.jpg' % (name, format)),
                    'w')
                f.write(response.body)
                f.close()
Exemplo n.º 7
0
def _RunService(callback):
    """Invokes user account merge utility."""
    assert options.options.method, 'must specify a service method (--method)'
    assert options.options.user_id, 'must specify a user id (--user_id)'

    # Read request body from standard in.
    if sys.stdin.isatty():
        print 'Enter JSON-encoded service request:'
    request_body = sys.stdin.read()

    # If version was not specified, add it now (use max supported version).
    request_dict = json.loads(request_body)
    if not request_dict.has_key('headers'):
        request_dict['headers'] = dict()

    if not request_dict['headers'].has_key('version'):
        request_dict['headers']['version'] = MAX_SUPPORTED_MESSAGE_VERSION

    client = DBClient.Instance()
    obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO)

    def _OnService(response_dict):
        logging.info('result: %s' %
                     util.ToCanonicalJSON(response_dict, indent=2))
        callback()

    def _OnQueryUser(user):
        context = base.ViewfinderContext(None)
        context.user = user
        context.device_id = user.webapp_dev_id if options.options.device_id is None else options.options.device_id

        with stack_context.StackContext(context):
            ServiceHandler.InvokeService(client,
                                         obj_store,
                                         options.options.method,
                                         context.user.user_id,
                                         context.device_id,
                                         request_dict,
                                         callback=_OnService)

    User.Query(client, options.options.user_id, None, _OnQueryUser)
Exemplo n.º 8
0
def RunOnce(client, job, callback):
  """Get list of files and call processing function."""
  merged_store = ObjectStore.GetInstance(logs_util.UserAnalyticsLogsPaths.MERGED_LOGS_BUCKET)

  start_date = options.options.start_date
  if options.options.smart_scan:
    # Search for successful full-scan run in the last week.
    last_run = yield gen.Task(job.FindLastSuccess, with_payload_key='stats.last_day')
    if last_run is None:
      logging.info('No previous successful scan found, rerun with --start_date')
      callback(None)
      return

    last_run_start = last_run['start_time']
    if util.HoursSince(last_run_start) < options.options.hours_between_runs:
      logging.info('Last successful run started at %s, less than %d hours ago; skipping.' %
                   (time.asctime(time.localtime(last_run_start)), options.options.hours_between_runs))
      callback(None)
      return

    last_day = last_run['stats.last_day']
    # Set scan_start to start of previous run - 30d (we need 30 days' worth of data to properly compute
    # 30-day active users. Add an extra 3 days just in case we had some missing logs during the last run.
    start_time = util.ISO8601ToUTCTimestamp(last_day, hour=12) - constants.SECONDS_PER_WEEK
    start_date = util.TimestampUTCToISO8601(start_time)
    logging.info('Last successful analyze_analytics run (%s) scanned up to %s, setting analysis start date to %s' %
                 (time.asctime(time.localtime(last_run_start)), last_day, start_date))

  # Fetch list of merged logs.
  files = yield gen.Task(GetMergedLogsFileList, merged_store, start_date)
  day_stats = yield gen.Task(ProcessFiles, merged_store, files)

  # Write per-day stats to dynamodb.
  if len(day_stats) > 0:
    hms = logs_util.kDailyMetricsTimeByLogType['analytics_logs']
    yield gen.Task(logs_util.UpdateMetrics, client, day_stats, dry_run=options.options.dry_run, hms_tuple=hms)
    last_day = sorted(day_stats.keys())[-1]
    callback(last_day)
  else:
    callback(None)
Exemplo n.º 9
0
def RunOnce(callback):
  """Get list of files and call processing function."""
  dry_run = options.options.dry_run
  client_store = ObjectStore.GetInstance(logs_util.UserAnalyticsLogsPaths.SOURCE_LOGS_BUCKET)

  if options.options.user:
    users = [options.options.user]
  else:
    users = yield gen.Task(logs_util.ListClientLogUsers, client_store)

  examined = 0
  for u in users:
    # Running all users in parallel can get us to exceed the open FD limit.
    if options.options.start_user is not None and u < options.options.start_user:
      continue
    if options.options.max_users is not None and examined > options.options.max_users:
      break
    examined += 1
    yield gen.Task(HandleOneUser, client_store, u)

  if dry_run:
    logging.warning('dry_run=True: will not upload processed logs files or update registry')

  callback()