def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str, realm_id: int) -> List[ZerverFieldsT]: """ This function gets the avatar of size 512 px and saves it in the user's avatar directory with both the extensions '.png' and '.original' """ logging.info('######### GETTING AVATARS #########\n') avatar_original_list = [] for avatar in avatar_list: avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id) slack_avatar_url = avatar['path'] avatar_original = dict(avatar) image_path = ('%s/%s.png' % (avatar_dir, avatar_hash)) original_image_path = ('%s/%s.original' % (avatar_dir, avatar_hash)) # Fetch the avatars from the url get_avatar(slack_avatar_url, image_path, original_image_path) image_size = os.stat(image_path).st_size avatar['path'] = image_path avatar['s3_path'] = image_path avatar['size'] = image_size avatar_original['path'] = original_image_path avatar_original['s3_path'] = original_image_path avatar_original['size'] = image_size avatar_original_list.append(avatar_original) logging.info('######### GETTING AVATARS FINISHED #########\n') return avatar_list + avatar_original_list
def _get_unversioned_avatar_url(user_profile_id, avatar_source, realm_id, email=None, medium=False): # type: (int, Text, int, Optional[Text], bool) -> Text if avatar_source == u'U': hash_key = user_avatar_path_from_ids(user_profile_id, realm_id) return upload_backend.get_avatar_url(hash_key, medium=medium) assert email is not None return _get_unversioned_gravatar_url(email, medium)
def write_avatar_png(avatar_folder: str, realm_id: int, user_id: int, bits: bytes) -> ZerverFieldsT: ''' Use this function for conversions like Hipchat where the bits for the .png file come in something like a users.json file, and where we don't have to fetch avatar images externally. ''' avatar_hash = user_avatar_path_from_ids( user_profile_id=user_id, realm_id=realm_id, ) image_fn = avatar_hash + '.original' image_path = os.path.join(avatar_folder, image_fn) with open(image_path, 'wb') as image_file: image_file.write(bits) # Return metadata that eventually goes in records.json. metadata = dict( path=image_path, s3_path=image_path, realm_id=realm_id, user_profile_id=user_id, # We only write the .original file; ask the importer to do the thumbnailing. importer_should_thumbnail=True, ) return metadata
def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str, realm_id: int, threads: int, size_url_suffix: str = '') -> List[ZerverFieldsT]: """ This function gets the avatar of the user and saves it in the user's avatar directory with both the extensions '.png' and '.original' Required parameters: 1. avatar_list: List of avatars to be mapped in avatars records.json file 2. avatar_dir: Folder where the downloaded avatars are saved 3. realm_id: Realm ID. """ def get_avatar(avatar_upload_list: List[str]) -> int: avatar_url = avatar_upload_list[0] image_path = avatar_upload_list[1] original_image_path = avatar_upload_list[2] response = requests.get(avatar_url + size_url_suffix, stream=True) with open(image_path, 'wb') as image_file: shutil.copyfileobj(response.raw, image_file) shutil.copy(image_path, original_image_path) return 0 logging.info('######### GETTING AVATARS #########\n') logging.info('DOWNLOADING AVATARS .......\n') avatar_original_list = [] avatar_upload_list = [] for avatar in avatar_list: avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id) avatar_url = avatar['path'] avatar_original = dict(avatar) image_path = ('%s/%s.png' % (avatar_dir, avatar_hash)) original_image_path = ('%s/%s.original' % (avatar_dir, avatar_hash)) avatar_upload_list.append( [avatar_url, image_path, original_image_path]) # We don't add the size field here in avatar's records.json, # since the metadata is not needed on the import end, and we # don't have it until we've downloaded the files anyway. avatar['path'] = image_path avatar['s3_path'] = image_path avatar_original['path'] = original_image_path avatar_original['s3_path'] = original_image_path avatar_original_list.append(avatar_original) # Run downloads parallely output = [] for (status, job) in run_parallel(get_avatar, avatar_upload_list, threads=threads): output.append(job) logging.info('######### GETTING AVATARS FINISHED #########\n') return avatar_list + avatar_original_list
def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str, realm_id: int, threads: int, size_url_suffix: str='') -> List[ZerverFieldsT]: """ This function gets the avatar of the user and saves it in the user's avatar directory with both the extensions '.png' and '.original' Required parameters: 1. avatar_list: List of avatars to be mapped in avatars records.json file 2. avatar_dir: Folder where the downloaded avatars are saved 3. realm_id: Realm ID. We use this for Slack and Gitter conversions, where avatars need to be downloaded. For simpler conversions see write_avatar_png. """ def get_avatar(avatar_upload_list: List[str]) -> int: avatar_url = avatar_upload_list[0] image_path = os.path.join(avatar_dir, avatar_original_list[1]) original_image_path = os.path.join(avatar_dir, avatar_original_list[2]) response = requests.get(avatar_url + size_url_suffix, stream=True) with open(image_path, 'wb') as image_file: shutil.copyfileobj(response.raw, image_file) shutil.copy(image_path, original_image_path) return 0 logging.info('######### GETTING AVATARS #########\n') logging.info('DOWNLOADING AVATARS .......\n') avatar_original_list = [] avatar_upload_list = [] for avatar in avatar_list: avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id) avatar_url = avatar['path'] avatar_original = dict(avatar) image_path = ('%s.png' % (avatar_hash)) original_image_path = ('%s.original' % (avatar_hash)) avatar_upload_list.append([avatar_url, image_path, original_image_path]) # We don't add the size field here in avatar's records.json, # since the metadata is not needed on the import end, and we # don't have it until we've downloaded the files anyway. avatar['path'] = image_path avatar['s3_path'] = image_path avatar_original['path'] = original_image_path avatar_original['s3_path'] = original_image_path avatar_original_list.append(avatar_original) # Run downloads parallely output = [] for (status, job) in run_parallel(get_avatar, avatar_upload_list, threads=threads): output.append(job) logging.info('######### GETTING AVATARS FINISHED #########\n') return avatar_list + avatar_original_list
def _get_unversioned_avatar_url(user_profile_id: int, avatar_source: str, realm_id: int, email: Optional[str]=None, medium: bool=False) -> str: if avatar_source == 'U': hash_key = user_avatar_path_from_ids(user_profile_id, realm_id) return upload_backend.get_avatar_url(hash_key, medium=medium) assert email is not None return _get_unversioned_gravatar_url(email, medium)
def _get_unversioned_avatar_url(user_profile_id: int, avatar_source: str, realm_id: int, email: Optional[str] = None, medium: bool = False) -> str: if avatar_source == 'U': hash_key = user_avatar_path_from_ids(user_profile_id, realm_id) return upload_backend.get_avatar_url(hash_key, medium=medium) assert email is not None return _get_unversioned_gravatar_url(email, medium)
def process_avatars( avatar_list: List[ZerverFieldsT], avatar_dir: str, realm_id: int, threads: int, size_url_suffix: str = "", ) -> List[ZerverFieldsT]: """ This function gets the avatar of the user and saves it in the user's avatar directory with both the extensions '.png' and '.original' Required parameters: 1. avatar_list: List of avatars to be mapped in avatars records.json file 2. avatar_dir: Folder where the downloaded avatars are saved 3. realm_id: Realm ID. We use this for Slack and Gitter conversions, where avatars need to be downloaded. For simpler conversions see write_avatar_png. """ logging.info("######### GETTING AVATARS #########\n") logging.info("DOWNLOADING AVATARS .......\n") avatar_original_list = [] avatar_upload_list = [] for avatar in avatar_list: avatar_hash = user_avatar_path_from_ids(avatar["user_profile_id"], realm_id) avatar_url = avatar["path"] avatar_original = dict(avatar) image_path = f"{avatar_hash}.png" original_image_path = f"{avatar_hash}.original" avatar_upload_list.append( [avatar_url, image_path, original_image_path]) # We don't add the size field here in avatar's records.json, # since the metadata is not needed on the import end, and we # don't have it until we've downloaded the files anyway. avatar["path"] = image_path avatar["s3_path"] = image_path avatar_original["path"] = original_image_path avatar_original["s3_path"] = original_image_path avatar_original_list.append(avatar_original) # Run downloads in parallel run_parallel_wrapper(partial(get_avatar, avatar_dir, size_url_suffix), avatar_upload_list, threads=threads) logging.info("######### GETTING AVATARS FINISHED #########\n") return avatar_list + avatar_original_list
def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str, realm_id: int, threads: int) -> List[ZerverFieldsT]: """ This function gets the avatar of size 512 px and saves it in the user's avatar directory with both the extensions '.png' and '.original' """ def get_avatar(avatar_upload_list: List[str]) -> int: # get avatar of size 512 slack_avatar_url = avatar_upload_list[0] image_path = avatar_upload_list[1] original_image_path = avatar_upload_list[2] response = requests.get(slack_avatar_url + '-512', stream=True) with open(image_path, 'wb') as image_file: shutil.copyfileobj(response.raw, image_file) shutil.copy(image_path, original_image_path) return 0 logging.info('######### GETTING AVATARS #########\n') logging.info('DOWNLOADING AVATARS .......\n') avatar_original_list = [] avatar_upload_list = [] for avatar in avatar_list: avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id) slack_avatar_url = avatar['path'] avatar_original = dict(avatar) image_path = ('%s/%s.png' % (avatar_dir, avatar_hash)) original_image_path = ('%s/%s.original' % (avatar_dir, avatar_hash)) avatar_upload_list.append( [slack_avatar_url, image_path, original_image_path]) avatar['path'] = image_path avatar['s3_path'] = image_path avatar_original['path'] = original_image_path avatar_original['s3_path'] = original_image_path avatar_original_list.append(avatar_original) # Run downloads parallely output = [] for (status, job) in run_parallel(get_avatar, avatar_upload_list, threads=threads): output.append(job) logging.info('######### GETTING AVATARS FINISHED #########\n') return avatar_list + avatar_original_list
def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str, realm_id: int, threads: int) -> List[ZerverFieldsT]: """ This function gets the avatar of size 512 px and saves it in the user's avatar directory with both the extensions '.png' and '.original' """ def get_avatar(avatar_upload_list: List[str]) -> int: # get avatar of size 512 slack_avatar_url = avatar_upload_list[0] image_path = avatar_upload_list[1] original_image_path = avatar_upload_list[2] response = requests.get(slack_avatar_url + '-512', stream=True) with open(image_path, 'wb') as image_file: shutil.copyfileobj(response.raw, image_file) shutil.copy(image_path, original_image_path) return 0 logging.info('######### GETTING AVATARS #########\n') logging.info('DOWNLOADING AVATARS .......\n') avatar_original_list = [] avatar_upload_list = [] for avatar in avatar_list: avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id) slack_avatar_url = avatar['path'] avatar_original = dict(avatar) image_path = ('%s/%s.png' % (avatar_dir, avatar_hash)) original_image_path = ('%s/%s.original' % (avatar_dir, avatar_hash)) avatar_upload_list.append([slack_avatar_url, image_path, original_image_path]) # We don't add the size field here in avatar's records.json, # since the metadata is not needed on the import end, and we # don't have it until we've downloaded the files anyway. avatar['path'] = image_path avatar['s3_path'] = image_path avatar_original['path'] = original_image_path avatar_original['s3_path'] = original_image_path avatar_original_list.append(avatar_original) # Run downloads parallely output = [] for (status, job) in run_parallel(get_avatar, avatar_upload_list, threads=threads): output.append(job) logging.info('######### GETTING AVATARS FINISHED #########\n') return avatar_list + avatar_original_list
def export_avatars_from_local(realm: Realm, local_dir: Path, output_dir: Path) -> None: count = 0 records = [] users = list(UserProfile.objects.filter(realm=realm)) users += [ get_system_bot(settings.NOTIFICATION_BOT), get_system_bot(settings.EMAIL_GATEWAY_BOT), get_system_bot(settings.WELCOME_BOT), ] for user in users: if user.avatar_source == UserProfile.AVATAR_FROM_GRAVATAR: continue avatar_path = user_avatar_path_from_ids(user.id, realm.id) wildcard = os.path.join(local_dir, avatar_path + '.*') for local_path in glob.glob(wildcard): logging.info('Copying avatar file for user %s from %s' % (user.email, local_path)) fn = os.path.relpath(local_path, local_dir) output_path = os.path.join(output_dir, fn) os.makedirs(str(os.path.dirname(output_path)), exist_ok=True) subprocess.check_call( ["cp", "-a", str(local_path), str(output_path)]) stat = os.stat(local_path) record = dict(realm_id=realm.id, user_profile_id=user.id, user_profile_email=user.email, s3_path=fn, path=fn, size=stat.st_size, last_modified=stat.st_mtime, content_type=None) records.append(record) count += 1 if (count % 100 == 0): logging.info("Finished %s" % (count, )) with open(os.path.join(output_dir, "records.json"), "w") as records_file: ujson.dump(records, records_file, indent=4)
def _get_unversioned_avatar_url(avatar_source, email=None, realm_id=None, user_profile_id=None, medium=False): # type: (Text, Text, Optional[int], Optional[int], bool) -> Text if avatar_source == u'U': if user_profile_id is not None and realm_id is not None: # If we can, avoid doing a database query to fetch user_profile hash_key = user_avatar_path_from_ids(user_profile_id, realm_id) else: user_profile = get_user_profile_by_email(email) hash_key = user_avatar_path(user_profile) return upload_backend.get_avatar_url(hash_key, medium=medium) elif settings.ENABLE_GRAVATAR: gravitar_query_suffix = "&s=%s" % (MEDIUM_AVATAR_SIZE,) if medium else "" hash_key = gravatar_hash(email) return u"https://secure.gravatar.com/avatar/%s?d=identicon%s" % (hash_key, gravitar_query_suffix) else: return settings.DEFAULT_AVATAR_URI+'?x=x'
def test_user_avatars(self, mock_get_avatar: mock.Mock, mock_os: mock.Mock) -> None: avatar_url = "https://ca.slack-edge.com/{}-{}-{}".format('T5YFFM2QY', 'U6006P1CN', 'gd41c3c33cbe') self.assertEqual(build_avatar_url('U6006P1CN', 'T5YFFM2QY', 'gd41c3c33cbe'), avatar_url) avatar_list = [] # type: List[Dict[str, Any]] timestamp = int(timezone_now().timestamp()) test_avatar_list = build_avatar(1, 1, 'email', avatar_url, timestamp, avatar_list) self.assertEqual(test_avatar_list[0]['path'], avatar_url) self.assertEqual(test_avatar_list[0]['s3_path'], '') self.assertEqual(test_avatar_list[0]['user_profile_id'], 1) avatar_list = process_avatars(test_avatar_list, './avatar_dir', 3) avatar_hash = user_avatar_path_from_ids(1, 3) image_path = ('%s/%s.png' % ('./avatar_dir', avatar_hash)) original_image_path = ('%s/%s.original' % ('./avatar_dir', avatar_hash)) self.assertEqual(avatar_list[0]['path'], image_path) self.assertEqual(avatar_list[1]['path'], original_image_path)
def export_avatars_from_local(realm: Realm, local_dir: Path, output_dir: Path) -> None: count = 0 records = [] users = list(UserProfile.objects.filter(realm=realm)) users += [ get_system_bot(settings.NOTIFICATION_BOT), get_system_bot(settings.EMAIL_GATEWAY_BOT), get_system_bot(settings.WELCOME_BOT), ] for user in users: if user.avatar_source == UserProfile.AVATAR_FROM_GRAVATAR: continue avatar_path = user_avatar_path_from_ids(user.id, realm.id) wildcard = os.path.join(local_dir, avatar_path + '.*') for local_path in glob.glob(wildcard): logging.info('Copying avatar file for user %s from %s' % ( user.email, local_path)) fn = os.path.relpath(local_path, local_dir) output_path = os.path.join(output_dir, fn) os.makedirs(str(os.path.dirname(output_path)), exist_ok=True) subprocess.check_call(["cp", "-a", str(local_path), str(output_path)]) stat = os.stat(local_path) record = dict(realm_id=realm.id, user_profile_id=user.id, user_profile_email=user.email, s3_path=fn, path=fn, size=stat.st_size, last_modified=stat.st_mtime, content_type=None) records.append(record) count += 1 if (count % 100 == 0): logging.info("Finished %s" % (count,)) with open(os.path.join(output_dir, "records.json"), "w") as records_file: ujson.dump(records, records_file, indent=4)
def test_user_avatars(self, mock_get_avatar: mock.Mock, mock_os: mock.Mock) -> None: avatar_url = "https://ca.slack-edge.com/{}-{}-{}".format('T5YFFM2QY', 'U6006P1CN', 'gd41c3c33cbe') self.assertEqual(build_avatar_url('U6006P1CN', 'T5YFFM2QY', 'gd41c3c33cbe'), avatar_url) avatar_list = [] # type: List[Dict[str, Any]] timestamp = int(timezone_now().timestamp()) build_avatar(1, 1, 'email', avatar_url, timestamp, avatar_list) self.assertEqual(avatar_list[0]['path'], avatar_url) self.assertEqual(avatar_list[0]['s3_path'], '') self.assertEqual(avatar_list[0]['user_profile_id'], 1) avatar_list = process_avatars(avatar_list, './avatar_dir', 3) avatar_hash = user_avatar_path_from_ids(1, 3) image_path = ('%s/%s.png' % ('./avatar_dir', avatar_hash)) original_image_path = ('%s/%s.original' % ('./avatar_dir', avatar_hash)) self.assertEqual(avatar_list[0]['path'], image_path) self.assertEqual(avatar_list[1]['path'], original_image_path)
def _get_unversioned_avatar_url(avatar_source, email=None, realm_id=None, user_profile_id=None, medium=False): # type: (Text, Text, Optional[int], Optional[int], bool) -> Text if avatar_source == u'U': if user_profile_id is not None and realm_id is not None: # If we can, avoid doing a database query to fetch user_profile hash_key = user_avatar_path_from_ids(user_profile_id, realm_id) else: user_profile = get_user_profile_by_email(email) hash_key = user_avatar_path(user_profile) return upload_backend.get_avatar_url(hash_key, medium=medium) elif settings.ENABLE_GRAVATAR: gravitar_query_suffix = "&s=%s" % ( MEDIUM_AVATAR_SIZE, ) if medium else "" hash_key = gravatar_hash(email) return u"https://secure.gravatar.com/avatar/%s?d=identicon%s" % ( hash_key, gravitar_query_suffix) else: return settings.DEFAULT_AVATAR_URI + '?x=x'
def import_uploads(import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: if processing_avatars and processing_emojis: raise AssertionError("Cannot import avatars and emojis at the same time!") if processing_avatars: logging.info("Importing avatars") elif processing_emojis: logging.info("Importing emojis") else: logging.info("Importing uploaded files") records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) # type: List[Dict[str, Any]] timestamp = datetime_to_timestamp(timezone_now()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) s3_uploads = settings.LOCAL_UPLOADS_DIR is None if s3_uploads: if processing_avatars or processing_emojis: bucket_name = settings.S3_AVATAR_BUCKET else: bucket_name = settings.S3_AUTH_UPLOADS_BUCKET conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) count = 0 for record in records: count += 1 if count % 1000 == 0: logging.info("Processed %s/%s uploads" % (count, len(records))) if processing_avatars: # For avatars, we need to rehash the user ID with the # new server's avatar salt relative_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) if record['s3_path'].endswith('.original'): relative_path += '.original' else: relative_path += '.png' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' relative_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) record['last_modified'] = timestamp else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_file' relative_path = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) path_maps['attachment_path'][record['s3_path']] = relative_path if s3_uploads: key = Key(bucket) key.key = relative_path # Exported custom emoji from tools like Slack don't have # the data for what user uploaded them in `user_profile_id`. if not processing_emojis: user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in ID_MAP["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = ID_MAP["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) if 'last_modified' in record: key.set_metadata("orig_last_modified", record['last_modified']) key.set_metadata("realm_id", str(record['realm_id'])) # Zulip exports will always have a content-type, but third-party exports might not. content_type = record.get("content_type") if content_type is None: content_type = guess_type(record['s3_path'])[0] headers = {'Content-Type': content_type} # type: Dict[str, Any] key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) else: if processing_avatars or processing_emojis: file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", relative_path) else: file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", relative_path) orig_file_path = os.path.join(import_dir, record['path']) os.makedirs(os.path.dirname(file_path), exist_ok=True) shutil.copy(orig_file_path, file_path) if processing_avatars: from zerver.lib.upload import upload_backend # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id(record['user_profile_id']) if settings.LOCAL_UPLOADS_DIR is not None: avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id']) medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + '-medium.png' if os.path.exists(medium_file_path): # We remove the image here primarily to deal with # issues when running the import script multiple # times in development (where one might reuse the # same realm ID from a previous iteration). os.remove(medium_file_path) upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool = False, processing_emojis: bool = False) -> None: upload_backend = S3UploadBackend() conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: key = Key(bucket) if processing_avatars: # For avatars, we need to rehash the user's email with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) key.key = avatar_path if record['s3_path'].endswith('.original'): key.key += '.original' if processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) key.key = emoji_path else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) key.key = s3_file_name path_maps['attachment_path'][record['path']] = s3_file_name user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in id_maps["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id, )) user_profile_id = id_maps["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) key.set_metadata("realm_id", str(user_profile.realm_id)) key.set_metadata("orig_last_modified", record['last_modified']) headers = {'Content-Type': record['content_type']} key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) if processing_avatars: # TODO: Ideally, we'd do this in a separate pass, after # all the avatars have been uploaded, since we may end up # unnecssarily resizing images just before the medium-size # image in the export is uploaded. See the local uplods # code path for more notes. upload_backend.ensure_medium_avatar_image( user_profile=user_profile)
def import_uploads_local(import_dir: Path, processing_avatars: bool = False, processing_emojis: bool = False) -> None: records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: if processing_avatars: # For avatars, we need to rehash the user ID with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) if record['s3_path'].endswith('.original'): file_path += '.original' else: file_path += '.png' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", emoji_path) else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", s3_file_name) path_maps['attachment_path'][record['path']] = s3_file_name orig_file_path = os.path.join(import_dir, record['path']) if not os.path.exists(os.path.dirname(file_path)): subprocess.check_call(["mkdir", "-p", os.path.dirname(file_path)]) shutil.copy(orig_file_path, file_path) if processing_avatars: # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. upload_backend = LocalUploadBackend() for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id( record['user_profile_id']) avatar_path = user_avatar_path_from_ids( user_profile.id, record['realm_id']) medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + '-medium.png' if os.path.exists(medium_file_path): # We remove the image here primarily to deal with # issues when running the import script multiple # times in development (where one might reuse the # same realm ID from a previous iteration). os.remove(medium_file_path) upload_backend.ensure_medium_avatar_image( user_profile=user_profile)
def export_files_from_s3(realm: Realm, bucket_name: str, output_dir: Path, processing_avatars: bool=False) -> None: conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records = [] logging.info("Downloading uploaded files from %s" % (bucket_name)) avatar_hash_values = set() user_ids = set() if processing_avatars: bucket_list = bucket.list() for user_profile in UserProfile.objects.filter(realm=realm): avatar_path = user_avatar_path_from_ids(user_profile.id, realm.id) avatar_hash_values.add(avatar_path) avatar_hash_values.add(avatar_path + ".original") user_ids.add(user_profile.id) else: bucket_list = bucket.list(prefix="%s/" % (realm.id,)) if settings.EMAIL_GATEWAY_BOT is not None: email_gateway_bot = get_system_bot(settings.EMAIL_GATEWAY_BOT) # type: Optional[UserProfile] else: email_gateway_bot = None count = 0 for bkey in bucket_list: if processing_avatars and bkey.name not in avatar_hash_values: continue key = bucket.get_key(bkey.name) # This can happen if an email address has moved realms if 'realm_id' in key.metadata and key.metadata['realm_id'] != str(realm.id): if email_gateway_bot is None or key.metadata['user_profile_id'] != str(email_gateway_bot.id): raise AssertionError("Key metadata problem: %s %s / %s" % (key.name, key.metadata, realm.id)) # Email gateway bot sends messages, potentially including attachments, cross-realm. print("File uploaded by email gateway bot: %s / %s" % (key.name, key.metadata)) elif processing_avatars: if 'user_profile_id' not in key.metadata: raise AssertionError("Missing user_profile_id in key metadata: %s" % (key.metadata,)) if int(key.metadata['user_profile_id']) not in user_ids: raise AssertionError("Wrong user_profile_id in key metadata: %s" % (key.metadata,)) elif 'realm_id' not in key.metadata: raise AssertionError("Missing realm_id in key metadata: %s" % (key.metadata,)) record = dict(s3_path=key.name, bucket=bucket_name, size=key.size, last_modified=key.last_modified, content_type=key.content_type, md5=key.md5) record.update(key.metadata) # A few early avatars don't have 'realm_id' on the object; fix their metadata user_profile = get_user_profile_by_id(record['user_profile_id']) if 'realm_id' not in record: record['realm_id'] = user_profile.realm_id record['user_profile_email'] = user_profile.email if processing_avatars: dirname = output_dir filename = os.path.join(dirname, key.name) record['path'] = key.name else: fields = key.name.split('/') if len(fields) != 3: raise AssertionError("Suspicious key with invalid format %s" % (key.name)) dirname = os.path.join(output_dir, fields[1]) filename = os.path.join(dirname, fields[2]) record['path'] = os.path.join(fields[1], fields[2]) if not os.path.exists(dirname): os.makedirs(dirname) key.get_contents_to_filename(filename) records.append(record) count += 1 if (count % 100 == 0): logging.info("Finished %s" % (count,)) with open(os.path.join(output_dir, "records.json"), "w") as records_file: ujson.dump(records, records_file, indent=4)
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: upload_backend = S3UploadBackend() conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: key = Key(bucket) if processing_avatars: # For avatars, we need to rehash the user's email with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) key.key = avatar_path if record['s3_path'].endswith('.original'): key.key += '.original' if processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) key.key = emoji_path else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) key.key = s3_file_name path_maps['attachment_path'][record['path']] = s3_file_name user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in id_maps["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = id_maps["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) key.set_metadata("realm_id", str(user_profile.realm_id)) key.set_metadata("orig_last_modified", record['last_modified']) headers = {'Content-Type': record['content_type']} key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) if processing_avatars: # TODO: Ideally, we'd do this in a separate pass, after # all the avatars have been uploaded, since we may end up # unnecssarily resizing images just before the medium-size # image in the export is uploaded. See the local uplods # code path for more notes. upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def import_uploads_local(import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: if processing_avatars: # For avatars, we need to rehash the user ID with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) if record['s3_path'].endswith('.original'): file_path += '.original' else: file_path += '.png' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", emoji_path) else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", s3_file_name) path_maps['attachment_path'][record['path']] = s3_file_name orig_file_path = os.path.join(import_dir, record['path']) if not os.path.exists(os.path.dirname(file_path)): subprocess.check_call(["mkdir", "-p", os.path.dirname(file_path)]) shutil.copy(orig_file_path, file_path) if processing_avatars: # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. upload_backend = LocalUploadBackend() for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id(record['user_profile_id']) avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id']) medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + '-medium.png' if os.path.exists(medium_file_path): # We remove the image here primarily to deal with # issues when running the import script multiple # times in development (where one might reuse the # same realm ID from a previous iteration). os.remove(medium_file_path) upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: upload_backend = S3UploadBackend() conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) timestamp = datetime_to_timestamp(timezone_now()) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: key = Key(bucket) if processing_avatars: # For avatars, we need to rehash the user's email with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) key.key = avatar_path if record['s3_path'].endswith('.original'): key.key += '.original' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) key.key = emoji_path record['last_modified'] = timestamp else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) key.key = s3_file_name path_maps['attachment_path'][record['s3_path']] = s3_file_name # Exported custom emoji from tools like Slack don't have # the data for what user uploaded them in `user_profile_id`. if not processing_emojis: user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in id_maps["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = id_maps["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) key.set_metadata("orig_last_modified", record['last_modified']) key.set_metadata("realm_id", str(record['realm_id'])) # Zulip exports will always have a content-type, but third-party exports might not. content_type = record.get("content_type", guess_type(record['s3_path'])[0]) headers = {'Content-Type': content_type} key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) if processing_avatars: # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. upload_backend = S3UploadBackend() for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id(record['user_profile_id']) upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def export_files_from_s3(realm: Realm, bucket_name: str, output_dir: Path, processing_avatars: bool = False) -> None: conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records = [] logging.info("Downloading uploaded files from %s" % (bucket_name)) avatar_hash_values = set() user_ids = set() if processing_avatars: bucket_list = bucket.list() for user_profile in UserProfile.objects.filter(realm=realm): avatar_path = user_avatar_path_from_ids(user_profile.id, realm.id) avatar_hash_values.add(avatar_path) avatar_hash_values.add(avatar_path + ".original") user_ids.add(user_profile.id) else: bucket_list = bucket.list(prefix="%s/" % (realm.id, )) if settings.EMAIL_GATEWAY_BOT is not None: email_gateway_bot = get_system_bot( settings.EMAIL_GATEWAY_BOT) # type: Optional[UserProfile] else: email_gateway_bot = None count = 0 for bkey in bucket_list: if processing_avatars and bkey.name not in avatar_hash_values: continue key = bucket.get_key(bkey.name) # This can happen if an email address has moved realms if 'realm_id' in key.metadata and key.metadata['realm_id'] != str( realm.id): if email_gateway_bot is None or key.metadata[ 'user_profile_id'] != str(email_gateway_bot.id): raise AssertionError("Key metadata problem: %s %s / %s" % (key.name, key.metadata, realm.id)) # Email gateway bot sends messages, potentially including attachments, cross-realm. print("File uploaded by email gateway bot: %s / %s" % (key.name, key.metadata)) elif processing_avatars: if 'user_profile_id' not in key.metadata: raise AssertionError( "Missing user_profile_id in key metadata: %s" % (key.metadata, )) if int(key.metadata['user_profile_id']) not in user_ids: raise AssertionError( "Wrong user_profile_id in key metadata: %s" % (key.metadata, )) elif 'realm_id' not in key.metadata: raise AssertionError("Missing realm_id in key metadata: %s" % (key.metadata, )) record = dict(s3_path=key.name, bucket=bucket_name, size=key.size, last_modified=key.last_modified, content_type=key.content_type, md5=key.md5) record.update(key.metadata) # A few early avatars don't have 'realm_id' on the object; fix their metadata user_profile = get_user_profile_by_id(record['user_profile_id']) if 'realm_id' not in record: record['realm_id'] = user_profile.realm_id record['user_profile_email'] = user_profile.email if processing_avatars: dirname = output_dir filename = os.path.join(dirname, key.name) record['path'] = key.name else: fields = key.name.split('/') if len(fields) != 3: raise AssertionError("Suspicious key with invalid format %s" % (key.name)) dirname = os.path.join(output_dir, fields[1]) filename = os.path.join(dirname, fields[2]) record['path'] = os.path.join(fields[1], fields[2]) if not os.path.exists(dirname): os.makedirs(dirname) key.get_contents_to_filename(filename) records.append(record) count += 1 if (count % 100 == 0): logging.info("Finished %s" % (count, )) with open(os.path.join(output_dir, "records.json"), "w") as records_file: ujson.dump(records, records_file, indent=4)