def write_upload_data(output_dir: str, realm_id: int) -> None: uploads_records = [] # type: List[ZerverFieldsT] uploads_folder = os.path.join(output_dir, 'uploads') os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True) attachments = [] # type: List[ZerverFieldsT] attachment = {"zerver_attachment": attachments} create_converted_data_files(uploads_records, output_dir, '/uploads/records.json') create_converted_data_files(attachment, output_dir, '/attachment.json')
def do_convert_data(gitter_data_file: str, output_dir: str, threads: int=6) -> None: # Subdomain is set by the user while running the import commands realm_subdomain = "" domain_name = settings.EXTERNAL_HOST os.makedirs(output_dir, exist_ok=True) # output directory should be empty initially if os.listdir(output_dir): raise Exception("Output directory should be empty!") # Read data from the gitter file with open(gitter_data_file, "r") as fp: gitter_data = ujson.load(fp) realm, avatar_list, user_map = gitter_workspace_to_realm( domain_name, gitter_data, realm_subdomain) subscriber_map = make_subscriber_map( zerver_subscription=realm['zerver_subscription'], ) # For user mentions user_short_name_to_full_name = {} for userprofile in realm['zerver_userprofile']: user_short_name_to_full_name[userprofile['short_name']] = userprofile['full_name'] convert_gitter_workspace_messages( gitter_data, output_dir, subscriber_map, user_map, user_short_name_to_full_name) avatar_folder = os.path.join(output_dir, 'avatars') avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) os.makedirs(avatar_realm_folder, exist_ok=True) avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads) attachment = {"zerver_attachment": []} # type: Dict[str, List[Any]] # IO realm.json create_converted_data_files(realm, output_dir, '/realm.json') # IO emoji records create_converted_data_files([], output_dir, '/emoji/records.json') # IO avatar records create_converted_data_files(avatar_records, output_dir, '/avatars/records.json') # IO uploads records create_converted_data_files([], output_dir, '/uploads/records.json') # IO attachments records create_converted_data_files(attachment, output_dir, '/attachment.json') subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) logging.info('######### DATA CONVERSION FINISHED #########\n') logging.info("Zulip data dump created at %s" % (output_dir))
def write_avatar_data(raw_user_data: List[ZerverFieldsT], output_dir: str, realm_id: int) -> None: avatar_folder = os.path.join(output_dir, 'avatars') avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) os.makedirs(avatar_realm_folder, exist_ok=True) avatar_records = convert_avatar_data( avatar_folder=avatar_folder, raw_data=raw_user_data, realm_id=realm_id, ) create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
def do_convert_data(gitter_data_file: str, output_dir: str, threads: int=6) -> None: # Subdomain is set by the user while running the import commands realm_subdomain = "" domain_name = settings.EXTERNAL_HOST os.makedirs(output_dir, exist_ok=True) # output directory should be empty initially if os.listdir(output_dir): raise Exception("Output directory should be empty!") # Read data from the gitter file gitter_data = json.load(open(gitter_data_file)) realm, avatar_list, user_map = gitter_workspace_to_realm( domain_name, gitter_data, realm_subdomain) # For user mentions user_short_name_to_full_name = {} for userprofile in realm['zerver_userprofile']: user_short_name_to_full_name[userprofile['short_name']] = userprofile['full_name'] convert_gitter_workspace_messages( gitter_data, output_dir, realm['zerver_subscription'], user_map, user_short_name_to_full_name) avatar_folder = os.path.join(output_dir, 'avatars') avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) os.makedirs(avatar_realm_folder, exist_ok=True) avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads) attachment = {"zerver_attachment": []} # type: Dict[str, List[Any]] # IO realm.json create_converted_data_files(realm, output_dir, '/realm.json') # IO emoji records create_converted_data_files([], output_dir, '/emoji/records.json') # IO avatar records create_converted_data_files(avatar_records, output_dir, '/avatars/records.json') # IO uploads records create_converted_data_files([], output_dir, '/uploads/records.json') # IO attachments records create_converted_data_files(attachment, output_dir, '/attachment.json') subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) logging.info('######### DATA CONVERSION FINISHED #########\n') logging.info("Zulip data dump created at %s" % (output_dir))
def process_raw_message_batch( realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_handler: UserHandler, is_pm_data: bool, output_dir: str, zerver_realmemoji: List[ZerverFieldsT], total_reactions: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT], zerver_attachment: List[ZerverFieldsT], upload_id_to_upload_data_map: Dict[str, Dict[str, Any]], ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int], rc_channel_mention_data: List[Dict[str, str]]) -> str: # Fix user mentions for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) rc_mention = "@{short_name}".format(**user) zulip_mention = "@**{full_name}**".format(**user) content = content.replace(rc_mention, zulip_mention) content = content.replace("@all", "@**all**") # We don't have an equivalent for Rocket.Chat's @here mention # which mentions all users active in the channel. content = content.replace("@here", "@**all**") # Fix channel mentions for mention_data in rc_channel_mention_data: rc_mention = mention_data["rc_mention"] zulip_mention = mention_data["zulip_mention"] content = content.replace(rc_mention, zulip_mention) return content mention_map: Dict[int, Set[int]] = {} zerver_message: List[ZerverFieldsT] = [] for raw_message in raw_messages: message_id = NEXT_ID("message") mention_user_ids = raw_message["mention_user_ids"] mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message["content"], mention_user_ids=mention_user_ids, rc_channel_mention_data=raw_message["rc_channel_mention_data"], ) if len(content) > 10000: # nocoverage logging.info("skipping too-long message of length %s", len(content)) continue date_sent = raw_message["date_sent"] sender_user_id = raw_message["sender_id"] recipient_id = raw_message["recipient_id"] rendered_content = None has_attachment = False has_image = False has_link = raw_message["has_link"] if "file" in raw_message: has_attachment = True has_link = True attachment_content, has_image = process_message_attachment( upload=raw_message["file"], realm_id=realm_id, message_id=message_id, user_id=sender_user_id, user_handler=user_handler, uploads_list=uploads_list, zerver_attachment=zerver_attachment, upload_id_to_upload_data_map=upload_id_to_upload_data_map, output_dir=output_dir, ) content += attachment_content topic_name = raw_message["topic_name"] message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=sender_user_id, has_image=has_image, has_link=has_link, has_attachment=has_attachment, ) zerver_message.append(message) build_reactions( total_reactions=total_reactions, reactions=raw_message["reactions"], message_id=message_id, zerver_realmemoji=zerver_realmemoji, ) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID("dump_file_id" + str(realm_id)) message_file = f"/messages-{dump_file_id:06}.json" create_converted_data_files(message_json, output_dir, message_file)
def write_info(self, output_dir: str, realm_id: int) -> None: attachments = [] # type: List[Dict[str, Any]] uploads_records = [] # type: List[Dict[str, Any]] def add_attachment(info: Dict[str, Any]) -> None: build_attachment( realm_id=realm_id, message_ids=info['message_ids'], user_id=info['sender_id'], fileinfo=dict( created=info['mtime'], # minor lie size=info['size'], name=info['name'], ), s3_path=info['target_path'], zerver_attachment=attachments, ) def add_upload(info: Dict[str, Any]) -> None: target_path = info['target_path'] upload_rec = dict( size=info['size'], user_profile_id=info['sender_id'], realm_id=realm_id, s3_path=target_path, path=target_path, content_type=None, ) uploads_records.append(upload_rec) def make_full_target_path(info: Dict[str, Any]) -> str: target_path = info['target_path'] full_target_path = os.path.join( output_dir, 'uploads', target_path, ) full_target_path = os.path.abspath(full_target_path) os.makedirs(os.path.dirname(full_target_path), exist_ok=True) return full_target_path def copy_file(info: Dict[str, Any]) -> None: source_path = info['local_fn'] target_path = make_full_target_path(info) shutil.copyfile(source_path, target_path) logging.info('Start processing attachment files') for info in self.info_dict.values(): add_attachment(info) add_upload(info) copy_file(info) uploads_folder = os.path.join(output_dir, 'uploads') os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True) attachment = dict( zerver_attachment=attachments ) create_converted_data_files(uploads_records, output_dir, '/uploads/records.json') create_converted_data_files(attachment, output_dir, '/attachment.json') logging.info('Done processing attachment files')
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int, added_users: AddedUsersT, added_recipient: AddedRecipientsT, added_channels: AddedChannelsT, realm: ZerverFieldsT, zerver_realmemoji: List[ZerverFieldsT], domain_name: str, output_dir: str, chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]: """ Returns: 1. reactions, which is a list of the reactions 2. uploads, which is a list of uploads to be mapped in uploads records.json 3. attachment, which is a list of the attachments """ all_messages = get_all_messages(slack_data_dir, added_channels) # we sort the messages according to the timestamp to show messages with # the proper date order all_messages = sorted(all_messages, key=lambda message: message['ts']) logging.info('######### IMPORTING MESSAGES STARTED #########\n') total_reactions = [] # type: List[ZerverFieldsT] total_attachments = [] # type: List[ZerverFieldsT] total_uploads = [] # type: List[ZerverFieldsT] # The messages are stored in batches low_index = 0 upper_index = low_index + chunk_size dump_file_id = 1 subscriber_map = make_subscriber_map( zerver_subscription=realm['zerver_subscription'], ) while True: message_data = all_messages[low_index:upper_index] if len(message_data) == 0: break zerver_message, zerver_usermessage, attachment, uploads, reactions = \ channel_message_to_zerver_message( realm_id, users, added_users, added_recipient, message_data, zerver_realmemoji, subscriber_map, added_channels, domain_name) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage) message_file = "/messages-%06d.json" % (dump_file_id,) logging.info("Writing Messages to %s\n" % (output_dir + message_file)) create_converted_data_files(message_json, output_dir, message_file) total_reactions += reactions total_attachments += attachment total_uploads += uploads low_index = upper_index upper_index = chunk_size + low_index dump_file_id += 1 logging.info('######### IMPORTING MESSAGES FINISHED #########\n') return total_reactions, total_uploads, total_attachments
def do_convert_data(input_tar_file: str, output_dir: str, masking_content: bool, api_token: Optional[str] = None, slim_mode: bool = False) -> None: input_data_dir = untar_input_file(input_tar_file) attachment_handler = AttachmentHandler() user_handler = UserHandler() subscriber_handler = SubscriberHandler() user_id_mapper = IdMapper() stream_id_mapper = IdMapper() realm_id = 0 realm = make_realm(realm_id=realm_id) # users.json -> UserProfile raw_user_data = read_user_data(data_dir=input_data_dir) convert_user_data( user_handler=user_handler, slim_mode=slim_mode, user_id_mapper=user_id_mapper, raw_data=raw_user_data, realm_id=realm_id, ) normal_users = user_handler.get_normal_users() # Don't write zerver_userprofile here, because we # may add more users later. # streams.json -> Stream raw_stream_data = read_room_data(data_dir=input_data_dir) zerver_stream = convert_room_data( raw_data=raw_stream_data, subscriber_handler=subscriber_handler, stream_id_mapper=stream_id_mapper, user_id_mapper=user_id_mapper, realm_id=realm_id, api_token=api_token, ) realm['zerver_stream'] = zerver_stream zerver_recipient = build_recipients( zerver_userprofile=normal_users, zerver_stream=zerver_stream, ) realm['zerver_recipient'] = zerver_recipient if api_token is None: if slim_mode: public_stream_subscriptions: List[ZerverFieldsT] = [] else: public_stream_subscriptions = build_public_stream_subscriptions( zerver_userprofile=normal_users, zerver_recipient=zerver_recipient, zerver_stream=zerver_stream, ) private_stream_subscriptions = build_stream_subscriptions( get_users=subscriber_handler.get_users, zerver_recipient=zerver_recipient, zerver_stream=[ stream_dict for stream_dict in zerver_stream if stream_dict['invite_only'] ], ) stream_subscriptions = public_stream_subscriptions + private_stream_subscriptions else: stream_subscriptions = build_stream_subscriptions( get_users=subscriber_handler.get_users, zerver_recipient=zerver_recipient, zerver_stream=zerver_stream, ) personal_subscriptions = build_personal_subscriptions( zerver_recipient=zerver_recipient, ) zerver_subscription = personal_subscriptions + stream_subscriptions realm['zerver_subscription'] = zerver_subscription zerver_realmemoji = write_emoticon_data( realm_id=realm_id, data_dir=input_data_dir, output_dir=output_dir, ) realm['zerver_realmemoji'] = zerver_realmemoji subscriber_map = make_subscriber_map( zerver_subscription=zerver_subscription, ) logging.info('Start importing message data') for message_key in [ 'UserMessage', 'NotificationMessage', 'PrivateUserMessage' ]: write_message_data( realm_id=realm_id, slim_mode=slim_mode, message_key=message_key, zerver_recipient=zerver_recipient, subscriber_map=subscriber_map, data_dir=input_data_dir, output_dir=output_dir, masking_content=masking_content, stream_id_mapper=stream_id_mapper, user_id_mapper=user_id_mapper, user_handler=user_handler, attachment_handler=attachment_handler, ) # Order is important here...don't write users until # we process everything else, since we may introduce # mirror users when processing messages. realm['zerver_userprofile'] = user_handler.get_all_users() realm['sort_by_date'] = True create_converted_data_files(realm, output_dir, '/realm.json') logging.info('Start importing avatar data') write_avatar_data( raw_user_data=raw_user_data, output_dir=output_dir, user_id_mapper=user_id_mapper, realm_id=realm_id, ) attachment_handler.write_info( output_dir=output_dir, realm_id=realm_id, ) logging.info('Start making tarball') subprocess.check_call( ["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) logging.info('Done making tarball')
def write_info(self, output_dir: str, realm_id: int) -> None: attachments = [] # type: List[Dict[str, Any]] uploads_records = [] # type: List[Dict[str, Any]] def add_attachment(info: Dict[str, Any]) -> None: build_attachment( realm_id=realm_id, message_ids=info['message_ids'], attachment_id=NEXT_ID('attachment'), user_id=info['sender_id'], fileinfo=dict( created=info['mtime'], # minor lie size=info['size'], name=info['name'], ), s3_path=info['target_path'], zerver_attachment=attachments, ) def add_upload(info: Dict[str, Any]) -> None: target_path = info['target_path'] upload_rec = dict( size=info['size'], user_profile_id=info['sender_id'], realm_id=realm_id, s3_path=target_path, path=target_path, content_type=None, last_modified=None, ) uploads_records.append(upload_rec) def make_full_target_path(info: Dict[str, Any]) -> str: target_path = info['target_path'] full_target_path = os.path.join( output_dir, 'uploads', target_path, ) full_target_path = os.path.abspath(full_target_path) os.makedirs(os.path.dirname(full_target_path), exist_ok=True) return full_target_path def copy_file(info: Dict[str, Any]) -> None: source_path = info['local_fn'] target_path = make_full_target_path(info) shutil.copyfile(source_path, target_path) logging.info('Start processing attachment files') for info in self.info_dict.values(): add_attachment(info) add_upload(info) copy_file(info) uploads_folder = os.path.join(output_dir, 'uploads') os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True) attachment = dict(zerver_attachment=attachments) create_converted_data_files(uploads_records, output_dir, '/uploads/records.json') create_converted_data_files(attachment, output_dir, '/attachment.json') logging.info('Done processing attachment files')
def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, attachment_handler: AttachmentHandler, get_recipient_id: Callable[[ZerverFieldsT], int], is_pm_data: bool, output_dir: str) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) hipchat_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(hipchat_mention, zulip_mention) content = content.replace('@here', '@**all**') return content mention_map = dict() # type: Dict[int, Set[int]] def make_message(message_id: int, raw_message: ZerverFieldsT) -> ZerverFieldsT: # One side effect here: mention_user_ids = { user_id_mapper.get(id) for id in set(raw_message['mention_user_ids']) if user_id_mapper.has(id) } mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message['content'], mention_user_ids=mention_user_ids, ) pub_date = raw_message['pub_date'] recipient_id = get_recipient_id(raw_message) rendered_content = None if is_pm_data: topic_name = '' else: topic_name = 'imported from hipchat' user_id = raw_message['sender_id'] # Another side effect: extra_content = attachment_handler.handle_message_data( realm_id=realm_id, message_id=message_id, sender_id=user_id, attachment=raw_message['attachment'], files_dir=raw_message['files_dir'], ) if extra_content: has_attachment = True content += '\n' + extra_content else: has_attachment = False return build_message( content=content, message_id=message_id, pub_date=pub_date, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=user_id, has_attachment=has_attachment, ) zerver_message = [ make_message(message_id=NEXT_ID('message'), raw_message=raw_message) for raw_message in raw_messages ] zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id') message_file = "/messages-%06d.json" % (dump_file_id, ) create_converted_data_files(message_json, output_dir, message_file)
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int = 6) -> None: # Subdomain is set by the user while running the import command realm_subdomain = "" realm_id = 0 domain_name = settings.EXTERNAL_HOST check_token_access(token) slack_data_dir = slack_zip_file.replace(".zip", "") if not os.path.exists(slack_data_dir): os.makedirs(slack_data_dir) os.makedirs(output_dir, exist_ok=True) if os.listdir(output_dir): raise Exception("Output directory should be empty!") subprocess.check_call(["unzip", "-q", slack_zip_file, "-d", slack_data_dir]) # We get the user data from the legacy token method of Slack API, which is depreciated # but we use it as the user email data is provided only in this method user_list = get_slack_api_data("https://slack.com/api/users.list", "members", token=token) fetch_shared_channel_users(user_list, slack_data_dir, token) custom_emoji_list = get_slack_api_data("https://slack.com/api/emoji.list", "emoji", token=token) ( realm, slack_user_id_to_zulip_user_id, slack_recipient_name_to_zulip_recipient_id, added_channels, added_mpims, dm_members, avatar_list, emoji_url_map, ) = slack_workspace_to_realm( domain_name, realm_id, user_list, realm_subdomain, slack_data_dir, custom_emoji_list ) reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages( slack_data_dir, user_list, realm_id, slack_user_id_to_zulip_user_id, slack_recipient_name_to_zulip_recipient_id, added_channels, added_mpims, dm_members, realm, realm["zerver_userprofile"], realm["zerver_realmemoji"], domain_name, output_dir, ) # Move zerver_reactions to realm.json file realm["zerver_reaction"] = reactions emoji_folder = os.path.join(output_dir, "emoji") os.makedirs(emoji_folder, exist_ok=True) emoji_records = process_emojis(realm["zerver_realmemoji"], emoji_folder, emoji_url_map, threads) avatar_folder = os.path.join(output_dir, "avatars") avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) os.makedirs(avatar_realm_folder, exist_ok=True) avatar_records = process_avatars( avatar_list, avatar_folder, realm_id, threads, size_url_suffix="-512" ) uploads_folder = os.path.join(output_dir, "uploads") os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True) uploads_records = process_uploads(uploads_list, uploads_folder, threads) attachment = {"zerver_attachment": zerver_attachment} team_info_dict = get_slack_api_data("https://slack.com/api/team.info", "team", token=token) realm_icons_folder = os.path.join(output_dir, "realm_icons") realm_icon_records = fetch_team_icons( realm["zerver_realm"][0], team_info_dict, realm_icons_folder ) create_converted_data_files(realm, output_dir, "/realm.json") create_converted_data_files(emoji_records, output_dir, "/emoji/records.json") create_converted_data_files(avatar_records, output_dir, "/avatars/records.json") create_converted_data_files(uploads_records, output_dir, "/uploads/records.json") create_converted_data_files(attachment, output_dir, "/attachment.json") create_converted_data_files(realm_icon_records, output_dir, "/realm_icons/records.json") rm_tree(slack_data_dir) subprocess.check_call(["tar", "-czf", output_dir + ".tar.gz", output_dir, "-P"]) logging.info("######### DATA CONVERSION FINISHED #########\n") logging.info("Zulip data dump created at %s", output_dir)
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int=6) -> None: # Subdomain is set by the user while running the import command realm_subdomain = "" realm_id = 0 domain_name = settings.EXTERNAL_HOST slack_data_dir = slack_zip_file.replace('.zip', '') if not os.path.exists(slack_data_dir): os.makedirs(slack_data_dir) os.makedirs(output_dir, exist_ok=True) # output directory should be empty initially if os.listdir(output_dir): raise Exception('Output directory should be empty!') subprocess.check_call(['unzip', '-q', slack_zip_file, '-d', slack_data_dir]) # with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref: # zip_ref.extractall(slack_data_dir) # We get the user data from the legacy token method of slack api, which is depreciated # but we use it as the user email data is provided only in this method user_list = get_slack_api_data(token, "https://slack.com/api/users.list", "members") # Get custom emoji from slack api custom_emoji_list = get_slack_api_data(token, "https://slack.com/api/emoji.list", "emoji") realm, added_users, added_recipient, added_channels, avatar_list, \ emoji_url_map = slack_workspace_to_realm(domain_name, realm_id, user_list, realm_subdomain, slack_data_dir, custom_emoji_list) reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages( slack_data_dir, user_list, realm_id, added_users, added_recipient, added_channels, realm, realm['zerver_userprofile'], realm['zerver_realmemoji'], domain_name, output_dir) # Move zerver_reactions to realm.json file realm['zerver_reaction'] = reactions emoji_folder = os.path.join(output_dir, 'emoji') os.makedirs(emoji_folder, exist_ok=True) emoji_records = process_emojis(realm['zerver_realmemoji'], emoji_folder, emoji_url_map, threads) avatar_folder = os.path.join(output_dir, 'avatars') avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) os.makedirs(avatar_realm_folder, exist_ok=True) avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads, size_url_suffix='-512') uploads_folder = os.path.join(output_dir, 'uploads') os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True) uploads_records = process_uploads(uploads_list, uploads_folder, threads) attachment = {"zerver_attachment": zerver_attachment} # IO realm.json create_converted_data_files(realm, output_dir, '/realm.json') # IO emoji records create_converted_data_files(emoji_records, output_dir, '/emoji/records.json') # IO avatar records create_converted_data_files(avatar_records, output_dir, '/avatars/records.json') # IO uploads records create_converted_data_files(uploads_records, output_dir, '/uploads/records.json') # IO attachments records create_converted_data_files(attachment, output_dir, '/attachment.json') # remove slack dir rm_tree(slack_data_dir) subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) logging.info('######### DATA CONVERSION FINISHED #########\n') logging.info("Zulip data dump created at %s" % (output_dir))
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int, added_users: AddedUsersT, added_recipient: AddedRecipientsT, added_channels: AddedChannelsT, realm: ZerverFieldsT, zerver_userprofile: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], domain_name: str, output_dir: str, chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]: """ Returns: 1. reactions, which is a list of the reactions 2. uploads, which is a list of uploads to be mapped in uploads records.json 3. attachment, which is a list of the attachments """ long_term_idle = process_long_term_idle_users(slack_data_dir, users, added_users, added_channels, zerver_userprofile) # Now, we actually import the messages. all_messages = get_messages_iterator(slack_data_dir, added_channels) logging.info('######### IMPORTING MESSAGES STARTED #########\n') total_reactions = [] # type: List[ZerverFieldsT] total_attachments = [] # type: List[ZerverFieldsT] total_uploads = [] # type: List[ZerverFieldsT] # The messages are stored in batches dump_file_id = 1 subscriber_map = make_subscriber_map( zerver_subscription=realm['zerver_subscription'], ) while True: message_data = [] _counter = 0 for msg in all_messages: _counter += 1 message_data.append(msg) if _counter == chunk_size: break if len(message_data) == 0: break zerver_message, zerver_usermessage, attachment, uploads, reactions = \ channel_message_to_zerver_message( realm_id, users, added_users, added_recipient, message_data, zerver_realmemoji, subscriber_map, added_channels, domain_name, long_term_idle) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage) message_file = "/messages-%06d.json" % (dump_file_id,) logging.info("Writing Messages to %s\n" % (output_dir + message_file)) create_converted_data_files(message_json, output_dir, message_file) total_reactions += reactions total_attachments += attachment total_uploads += uploads dump_file_id += 1 logging.info('######### IMPORTING MESSAGES FINISHED #########\n') return total_reactions, total_uploads, total_attachments
def do_convert_data(input_tar_file: str, output_dir: str) -> None: input_data_dir = untar_input_file(input_tar_file) attachment_handler = AttachmentHandler() user_handler = UserHandler() realm_id = 0 realm = make_realm(realm_id=realm_id) # users.json -> UserProfile raw_user_data = read_user_data(data_dir=input_data_dir) convert_user_data( user_handler=user_handler, raw_data=raw_user_data, realm_id=realm_id, ) normal_users = user_handler.get_normal_users() # Don't write zerver_userprofile here, because we # may add more users later. # streams.json -> Stream raw_stream_data = read_room_data(data_dir=input_data_dir) zerver_stream = convert_room_data( raw_data=raw_stream_data, realm_id=realm_id, ) realm['zerver_stream'] = zerver_stream zerver_recipient = build_recipients( zerver_userprofile=normal_users, zerver_stream=zerver_stream, ) realm['zerver_recipient'] = zerver_recipient zerver_subscription = build_subscriptions( zerver_userprofile=normal_users, zerver_recipient=zerver_recipient, zerver_stream=zerver_stream, ) realm['zerver_subscription'] = zerver_subscription zerver_realmemoji = write_emoticon_data( realm_id=realm_id, data_dir=input_data_dir, output_dir=output_dir, ) realm['zerver_realmemoji'] = zerver_realmemoji logging.info('Start importing message data') for message_key in ['UserMessage', 'PrivateUserMessage']: write_message_data( realm_id=realm_id, message_key=message_key, zerver_recipient=zerver_recipient, zerver_subscription=zerver_subscription, data_dir=input_data_dir, output_dir=output_dir, user_handler=user_handler, attachment_handler=attachment_handler, ) # Order is important here...don't write users until # we process everything else, since we may introduce # mirror users when processing messages. realm['zerver_userprofile'] = user_handler.get_all_users() create_converted_data_files(realm, output_dir, '/realm.json') logging.info('Start importing avatar data') write_avatar_data( raw_user_data=raw_user_data, output_dir=output_dir, realm_id=realm_id, ) attachment_handler.write_info( output_dir=output_dir, realm_id=realm_id, ) logging.info('Start making tarball') subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) logging.info('Done making tarball')
def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None: # Get all required exported data in a dictionary rocketchat_data = rocketchat_data_to_dict(rocketchat_data_dir) # Subdomain is set by the user while running the import command realm_subdomain = "" realm_id = 0 domain_name = settings.EXTERNAL_HOST realm = make_realm(realm_id, realm_subdomain, domain_name, rocketchat_data["instance"][0]) user_id_to_user_map: Dict[str, Dict[str, Any]] = map_user_id_to_user( rocketchat_data["user"]) username_to_user_id_map: Dict[str, str] = map_username_to_user_id( user_id_to_user_map) user_handler = UserHandler() subscriber_handler = SubscriberHandler() user_id_mapper = IdMapper() stream_id_mapper = IdMapper() huddle_id_mapper = IdMapper() process_users( user_id_to_user_map=user_id_to_user_map, realm_id=realm_id, domain_name=domain_name, user_handler=user_handler, user_id_mapper=user_id_mapper, ) room_id_to_room_map: Dict[str, Dict[str, Any]] = {} team_id_to_team_map: Dict[str, Dict[str, Any]] = {} dsc_id_to_dsc_map: Dict[str, Dict[str, Any]] = {} direct_id_to_direct_map: Dict[str, Dict[str, Any]] = {} huddle_id_to_huddle_map: Dict[str, Dict[str, Any]] = {} categorize_channels_and_map_with_id( channel_data=rocketchat_data["room"], room_id_to_room_map=room_id_to_room_map, team_id_to_team_map=team_id_to_team_map, dsc_id_to_dsc_map=dsc_id_to_dsc_map, direct_id_to_direct_map=direct_id_to_direct_map, huddle_id_to_huddle_map=huddle_id_to_huddle_map, ) zerver_stream = convert_channel_data( room_id_to_room_map=room_id_to_room_map, team_id_to_team_map=team_id_to_team_map, stream_id_mapper=stream_id_mapper, realm_id=realm_id, ) realm["zerver_stream"] = zerver_stream # Add stream subscription data to `subscriber_handler` convert_stream_subscription_data( user_id_to_user_map=user_id_to_user_map, dsc_id_to_dsc_map=dsc_id_to_dsc_map, zerver_stream=zerver_stream, stream_id_mapper=stream_id_mapper, user_id_mapper=user_id_mapper, subscriber_handler=subscriber_handler, ) zerver_huddle = convert_huddle_data( huddle_id_to_huddle_map=huddle_id_to_huddle_map, huddle_id_mapper=huddle_id_mapper, user_id_mapper=user_id_mapper, subscriber_handler=subscriber_handler, ) realm["zerver_huddle"] = zerver_huddle all_users = user_handler.get_all_users() zerver_recipient = build_recipients( zerver_userprofile=all_users, zerver_stream=zerver_stream, zerver_huddle=zerver_huddle, ) realm["zerver_recipient"] = zerver_recipient stream_subscriptions = build_stream_subscriptions( get_users=subscriber_handler.get_users, zerver_recipient=zerver_recipient, zerver_stream=zerver_stream, ) huddle_subscriptions = build_huddle_subscriptions( get_users=subscriber_handler.get_users, zerver_recipient=zerver_recipient, zerver_huddle=zerver_huddle, ) personal_subscriptions = build_personal_subscriptions( zerver_recipient=zerver_recipient, ) zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions realm["zerver_subscription"] = zerver_subscription zerver_realmemoji = build_custom_emoji( realm_id=realm_id, custom_emoji_data=rocketchat_data["custom_emoji"], output_dir=output_dir, ) realm["zerver_realmemoji"] = zerver_realmemoji subscriber_map = make_subscriber_map( zerver_subscription=zerver_subscription, ) stream_id_to_recipient_id: Dict[int, int] = {} huddle_id_to_recipient_id: Dict[int, int] = {} user_id_to_recipient_id: Dict[int, int] = {} map_receiver_id_to_recipient_id( zerver_recipient=zerver_recipient, stream_id_to_recipient_id=stream_id_to_recipient_id, huddle_id_to_recipient_id=huddle_id_to_recipient_id, user_id_to_recipient_id=user_id_to_recipient_id, ) channel_messages: List[Dict[str, Any]] = [] private_messages: List[Dict[str, Any]] = [] separate_channel_and_private_messages( messages=rocketchat_data["message"], direct_id_to_direct_map=direct_id_to_direct_map, huddle_id_to_huddle_map=huddle_id_to_huddle_map, channel_messages=channel_messages, private_messages=private_messages, ) total_reactions: List[ZerverFieldsT] = [] uploads_list: List[ZerverFieldsT] = [] zerver_attachment: List[ZerverFieldsT] = [] upload_id_to_upload_data_map = map_upload_id_to_upload_data( rocketchat_data["upload"]) # Process channel messages process_messages( realm_id=realm_id, messages=channel_messages, subscriber_map=subscriber_map, is_pm_data=False, username_to_user_id_map=username_to_user_id_map, user_id_mapper=user_id_mapper, user_handler=user_handler, user_id_to_recipient_id=user_id_to_recipient_id, stream_id_mapper=stream_id_mapper, stream_id_to_recipient_id=stream_id_to_recipient_id, huddle_id_mapper=huddle_id_mapper, huddle_id_to_recipient_id=huddle_id_to_recipient_id, room_id_to_room_map=room_id_to_room_map, dsc_id_to_dsc_map=dsc_id_to_dsc_map, direct_id_to_direct_map=direct_id_to_direct_map, huddle_id_to_huddle_map=huddle_id_to_huddle_map, zerver_realmemoji=zerver_realmemoji, total_reactions=total_reactions, uploads_list=uploads_list, zerver_attachment=zerver_attachment, upload_id_to_upload_data_map=upload_id_to_upload_data_map, output_dir=output_dir, ) # Process private messages process_messages( realm_id=realm_id, messages=private_messages, subscriber_map=subscriber_map, is_pm_data=True, username_to_user_id_map=username_to_user_id_map, user_id_mapper=user_id_mapper, user_handler=user_handler, user_id_to_recipient_id=user_id_to_recipient_id, stream_id_mapper=stream_id_mapper, stream_id_to_recipient_id=stream_id_to_recipient_id, huddle_id_mapper=huddle_id_mapper, huddle_id_to_recipient_id=huddle_id_to_recipient_id, room_id_to_room_map=room_id_to_room_map, dsc_id_to_dsc_map=dsc_id_to_dsc_map, direct_id_to_direct_map=direct_id_to_direct_map, huddle_id_to_huddle_map=huddle_id_to_huddle_map, zerver_realmemoji=zerver_realmemoji, total_reactions=total_reactions, uploads_list=uploads_list, zerver_attachment=zerver_attachment, upload_id_to_upload_data_map=upload_id_to_upload_data_map, output_dir=output_dir, ) realm["zerver_reaction"] = total_reactions realm["zerver_userprofile"] = user_handler.get_all_users() realm["sort_by_date"] = True create_converted_data_files(realm, output_dir, "/realm.json") # TODO: Add support for importing avatars create_converted_data_files([], output_dir, "/avatars/records.json") # Import attachments attachment: Dict[str, List[Any]] = {"zerver_attachment": zerver_attachment} create_converted_data_files(attachment, output_dir, "/attachment.json") create_converted_data_files(uploads_list, output_dir, "/uploads/records.json") logging.info("Start making tarball") subprocess.check_call( ["tar", "-czf", output_dir + ".tar.gz", output_dir, "-P"]) logging.info("Done making tarball")
def process_message_file(fn: str, get_recipient_id: Callable[[ZerverFieldsT], int], message_key: str, user_map: Dict[int, ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], data_dir: str, output_dir: str) -> None: def fix_mentions(content: str, mention_user_ids: List[int]) -> str: for user_id in mention_user_ids: user = user_map[user_id] hipchat_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(hipchat_mention, zulip_mention) content = content.replace('@here', '@**all**') return content def get_raw_messages(fn: str) -> List[ZerverFieldsT]: dir = os.path.dirname(fn) fn_id = int(os.path.basename(dir)) data = json.load(open(fn)) flat_data = [ d[message_key] for d in data if message_key in d ] return [ dict( fn_id=fn_id, sender_id=d['sender']['id'], receiver_id=d.get('receiver', {}).get('id'), content=d['message'], mention_user_ids=d['mentions'], pub_date=str_date_to_float(d['timestamp']), ) for d in flat_data ] raw_messages = get_raw_messages(fn) mention_map = dict() # type: Dict[int, Set[int]] def make_message(message_id: int, raw_message: ZerverFieldsT) -> ZerverFieldsT: # One side effect here: mention_map[message_id] = set(raw_message['mention_user_ids']) content = fix_mentions( content=raw_message['content'], mention_user_ids=raw_message['mention_user_ids'], ) pub_date = raw_message['pub_date'] recipient_id = get_recipient_id(raw_message) rendered_content = None subject = 'archived' user_id = raw_message['sender_id'] return build_message( content=content, message_id=message_id, pub_date=pub_date, recipient_id=recipient_id, rendered_content=rendered_content, subject=subject, user_id=user_id, ) zerver_message = [ make_message( message_id=NEXT_ID('message'), raw_message=raw_message ) for raw_message in raw_messages ] subscriber_map = dict() # type: Dict[int, Set[int]] for sub in zerver_subscription: user_id = sub['user_profile'] recipient_id = sub['recipient'] if recipient_id not in subscriber_map: subscriber_map[recipient_id] = set() subscriber_map[recipient_id].add(user_id) zerver_usermessage = [] for message in zerver_message: message_id = message['id'] recipient_id = message['recipient'] mention_user_ids = mention_map[message_id] user_ids = subscriber_map.get(recipient_id, set()) for user_id in user_ids: is_mentioned = user_id in mention_user_ids user_message = build_user_message( id=NEXT_ID('user_message'), user_id=user_id, message_id=message_id, is_mentioned=is_mentioned, ) zerver_usermessage.append(user_message) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id') message_file = "/messages-%06d.json" % (dump_file_id,) create_converted_data_files(message_json, output_dir, message_file)
def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, get_recipient_id_from_receiver_name: Callable[[str, int], int], is_pm_data: bool, output_dir: str, zerver_realmemoji: List[Dict[str, Any]], total_reactions: List[Dict[str, Any]], ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) mattermost_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(mattermost_mention, zulip_mention) content = content.replace('@channel', '@**all**') content = content.replace('@all', '@**all**') # We don't have an equivalent for Mattermost's @here mention which mentions all users # online in the channel. content = content.replace('@here', '@**all**') return content mention_map: Dict[int, Set[int]] = dict() zerver_message = [] import html2text h = html2text.HTML2Text() pm_members = {} for raw_message in raw_messages: message_id = NEXT_ID('message') mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper) mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message['content'], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: # nocoverage logging.info('skipping too-long message of length %s' % (len(content),)) continue date_sent = raw_message['date_sent'] sender_user_id = raw_message['sender_id'] if "channel_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name(raw_message["channel_name"], Recipient.STREAM) elif "huddle_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name(raw_message["huddle_name"], Recipient.HUDDLE) elif "pm_members" in raw_message: members = raw_message["pm_members"] member_ids = {user_id_mapper.get(member) for member in members} pm_members[message_id] = member_ids if sender_user_id == user_id_mapper.get(members[0]): recipient_id = get_recipient_id_from_receiver_name(members[1], Recipient.PERSONAL) else: recipient_id = get_recipient_id_from_receiver_name(members[0], Recipient.PERSONAL) else: raise AssertionError("raw_message without channel_name, huddle_name or pm_members key") rendered_content = None topic_name = 'imported from mattermost' message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=sender_user_id, has_attachment=False, ) zerver_message.append(message) build_reactions(realm_id, total_reactions, raw_message["reactions"], message_id, user_id_mapper, zerver_realmemoji) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id' + str(realm_id)) message_file = "/messages-%06d.json" % (dump_file_id,) create_converted_data_files(message_json, output_dir, message_file)
def write_emoticon_data( realm_id: int, custom_emoji_data: List[Dict[str, Any]], data_dir: str, output_dir: str ) -> List[ZerverFieldsT]: """ This function does most of the work for processing emoticons, the bulk of which is copying files. We also write a json file with metadata. Finally, we return a list of RealmEmoji dicts to our caller. In our data_dir we have a pretty simple setup: The exported JSON file will have emoji rows if it contains any custom emoji { "type": "emoji", "emoji": {"name": "peerdium", "image": "exported_emoji/h15ni7kf1bnj7jeua4qhmctsdo/image"} } { "type": "emoji", "emoji": {"name": "tick", "image": "exported_emoji/7u7x8ytgp78q8jir81o9ejwwnr/image"} } exported_emoji/ - contains a bunch of image files: exported_emoji/7u7x8ytgp78q8jir81o9ejwwnr/image exported_emoji/h15ni7kf1bnj7jeua4qhmctsdo/image We move all the relevant files to Zulip's more nested directory structure. """ logging.info("Starting to process emoticons") flat_data = [ dict( path=d["image"], name=d["name"], ) for d in custom_emoji_data ] emoji_folder = os.path.join(output_dir, "emoji") os.makedirs(emoji_folder, exist_ok=True) def process(data: ZerverFieldsT) -> ZerverFieldsT: source_sub_path = data["path"] source_path = os.path.join(data_dir, source_sub_path) target_fn = data["name"] target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=realm_id, emoji_file_name=target_fn, ) target_path = os.path.join(emoji_folder, target_sub_path) os.makedirs(os.path.dirname(target_path), exist_ok=True) source_path = os.path.abspath(source_path) target_path = os.path.abspath(target_path) shutil.copyfile(source_path, target_path) return dict( path=target_path, s3_path=target_path, file_name=target_fn, realm_id=realm_id, name=data["name"], ) emoji_records = list(map(process, flat_data)) create_converted_data_files(emoji_records, output_dir, "/emoji/records.json") realmemoji = [ build_realm_emoji( realm_id=realm_id, name=rec["name"], id=NEXT_ID("realmemoji"), file_name=rec["file_name"], ) for rec in emoji_records ] logging.info("Done processing emoticons") return realmemoji
def process_raw_message_batch( realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, get_recipient_id: Callable[[ZerverFieldsT], int], is_pm_data: bool, output_dir: str, zerver_realmemoji: List[Dict[str, Any]], total_reactions: List[Dict[str, Any]], ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) mattermost_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(mattermost_mention, zulip_mention) content = content.replace('@channel', '@**all**') content = content.replace('@all', '@**all**') # We don't have an equivalent for Mattermost's @here mention which mentions all users # online in the channel. content = content.replace('@here', '@**all**') return content mention_map = dict() # type: Dict[int, Set[int]] zerver_message = [] import html2text h = html2text.HTML2Text() name_to_codepoint = get_name_to_codepoint_dict() for raw_message in raw_messages: message_id = NEXT_ID('message') mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper) mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message['content'], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: # nocoverage logging.info('skipping too-long message of length %s' % (len(content), )) continue pub_date = raw_message['pub_date'] try: recipient_id = get_recipient_id(raw_message) except KeyError: logging.debug( "Could not find recipient_id for a message, skipping.") continue rendered_content = None topic_name = 'imported from mattermost' user_id = raw_message['sender_id'] message = build_message( content=content, message_id=message_id, pub_date=pub_date, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=user_id, has_attachment=False, ) zerver_message.append(message) build_reactions(realm_id, total_reactions, raw_message["reactions"], message_id, name_to_codepoint, user_id_mapper, zerver_realmemoji) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id' + str(realm_id)) message_file = "/messages-%06d.json" % (dump_file_id, ) create_converted_data_files(message_json, output_dir, message_file)
def write_emoticon_data(realm_id: int, data_dir: str, output_dir: str) -> List[ZerverFieldsT]: ''' This function does most of the work for processing emoticons, the bulk of which is copying files. We also write a json file with metadata. Finally, we return a list of RealmEmoji dicts to our caller. In our data_dir we have a pretty simple setup: emoticons.json - has very simple metadata on emojis: { "Emoticon": { "id": 9875487, "path": "emoticons/yasss.jpg", "shortcut": "yasss" } }, { "Emoticon": { "id": 718017, "path": "emoticons/yayyyyy.gif", "shortcut": "yayyyyy" } } emoticons/ - contains a bunch of image files: slytherinsnake.gif spanishinquisition.jpg sparkle.png spiderman.gif stableparrot.gif stalkerparrot.gif supergirl.png superman.png We move all the relevant files to Zulip's more nested directory structure. ''' logging.info('Starting to process emoticons') fn = 'emoticons.json' data_file = os.path.join(data_dir, fn) if not os.path.exists(data_file): logging.warning("HipChat export does not contain emoticons.json.") logging.warning("As a result, custom emoji cannot be imported.") return [] with open(data_file) as f: data = ujson.load(f) if isinstance(data, dict) and 'Emoticons' in data: # Handle the hc-migrate export format for emoticons.json. flat_data = [ dict( path=d['path'], name=d['shortcut'], ) for d in data['Emoticons'] ] else: flat_data = [ dict( path=d['Emoticon']['path'], name=d['Emoticon']['shortcut'], ) for d in data ] emoji_folder = os.path.join(output_dir, 'emoji') os.makedirs(emoji_folder, exist_ok=True) def process(data: ZerverFieldsT) -> ZerverFieldsT: source_sub_path = data['path'] source_fn = os.path.basename(source_sub_path) source_path = os.path.join(data_dir, source_sub_path) # Use our template from RealmEmoji # PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}" target_fn = source_fn target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=realm_id, emoji_file_name=target_fn, ) target_path = os.path.join(emoji_folder, target_sub_path) os.makedirs(os.path.dirname(target_path), exist_ok=True) source_path = os.path.abspath(source_path) target_path = os.path.abspath(target_path) shutil.copyfile(source_path, target_path) return dict( path=target_path, s3_path=target_path, file_name=target_fn, realm_id=realm_id, name=data['name'], ) emoji_records = list(map(process, flat_data)) create_converted_data_files(emoji_records, output_dir, '/emoji/records.json') realmemoji = [ build_realm_emoji( realm_id=realm_id, name=rec['name'], id=NEXT_ID('realmemoji'), file_name=rec['file_name'], ) for rec in emoji_records ] logging.info('Done processing emoticons') return realmemoji
def process_raw_message_batch( realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, get_recipient_id_from_receiver_name: Callable[[str, int], int], is_pm_data: bool, output_dir: str, zerver_realmemoji: List[Dict[str, Any]], total_reactions: List[Dict[str, Any]], uploads_list: List[ZerverFieldsT], zerver_attachment: List[ZerverFieldsT], mattermost_data_dir: str, ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) mattermost_mention = "@{short_name}".format(**user) zulip_mention = "@**{full_name}**".format(**user) content = content.replace(mattermost_mention, zulip_mention) content = content.replace("@channel", "@**all**") content = content.replace("@all", "@**all**") # We don't have an equivalent for Mattermost's @here mention which mentions all users # online in the channel. content = content.replace("@here", "@**all**") return content mention_map: Dict[int, Set[int]] = {} zerver_message = [] import html2text h = html2text.HTML2Text() pm_members = {} for raw_message in raw_messages: message_id = NEXT_ID("message") mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper) mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message["content"], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: # nocoverage logging.info("skipping too-long message of length %s", len(content)) continue date_sent = raw_message["date_sent"] sender_user_id = raw_message["sender_id"] if "channel_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name( raw_message["channel_name"], Recipient.STREAM ) elif "huddle_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name( raw_message["huddle_name"], Recipient.HUDDLE ) elif "pm_members" in raw_message: members = raw_message["pm_members"] member_ids = {user_id_mapper.get(member) for member in members} pm_members[message_id] = member_ids if sender_user_id == user_id_mapper.get(members[0]): recipient_id = get_recipient_id_from_receiver_name(members[1], Recipient.PERSONAL) else: recipient_id = get_recipient_id_from_receiver_name(members[0], Recipient.PERSONAL) else: raise AssertionError("raw_message without channel_name, huddle_name or pm_members key") rendered_content = None has_attachment = False has_image = False has_link = False if "attachments" in raw_message: has_attachment = True has_link = True attachment_markdown, has_image = process_message_attachments( attachments=raw_message["attachments"], realm_id=realm_id, message_id=message_id, user_id=sender_user_id, user_handler=user_handler, zerver_attachment=zerver_attachment, uploads_list=uploads_list, mattermost_data_dir=mattermost_data_dir, output_dir=output_dir, ) content += attachment_markdown topic_name = "imported from mattermost" message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=sender_user_id, has_image=has_image, has_link=has_link, has_attachment=has_attachment, ) zerver_message.append(message) build_reactions( realm_id, total_reactions, raw_message["reactions"], message_id, user_id_mapper, zerver_realmemoji, ) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID("dump_file_id" + str(realm_id)) message_file = f"/messages-{dump_file_id:06}.json" create_converted_data_files(message_json, output_dir, message_file)
def do_convert_data(input_tar_file: str, output_dir: str) -> None: input_data_dir = untar_input_file(input_tar_file) realm_id = 0 realm = make_realm(realm_id=realm_id) # users.json -> UserProfile raw_user_data = read_user_data(data_dir=input_data_dir) zerver_userprofile = convert_user_data( raw_data=raw_user_data, realm_id=realm_id, ) realm['zerver_userprofile'] = zerver_userprofile # streams.json -> Stream raw_stream_data = read_room_data(data_dir=input_data_dir) zerver_stream = convert_room_data( raw_data=raw_stream_data, realm_id=realm_id, ) realm['zerver_stream'] = zerver_stream zerver_recipient = build_recipients( zerver_userprofile=zerver_userprofile, zerver_stream=zerver_stream, ) realm['zerver_recipient'] = zerver_recipient zerver_subscription = build_subscriptions( zerver_userprofile=zerver_userprofile, zerver_recipient=zerver_recipient, zerver_stream=zerver_stream, ) realm['zerver_subscription'] = zerver_subscription zerver_realmemoji = write_emoticon_data( realm_id=realm_id, data_dir=input_data_dir, output_dir=output_dir, ) realm['zerver_realmemoji'] = zerver_realmemoji create_converted_data_files(realm, output_dir, '/realm.json') for message_key in ['UserMessage', 'PrivateUserMessage']: write_message_data( realm_id=realm_id, message_key=message_key, zerver_recipient=zerver_recipient, zerver_subscription=zerver_subscription, zerver_userprofile=zerver_userprofile, data_dir=input_data_dir, output_dir=output_dir, ) write_avatar_data( raw_user_data=raw_user_data, output_dir=output_dir, realm_id=realm_id, ) write_upload_data( output_dir=output_dir, realm_id=realm_id, ) subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
def do_convert_data(mattermost_data_dir: str, output_dir: str, masking_content: bool) -> None: username_to_user: Dict[str, Dict[str, Any]] = {} os.makedirs(output_dir, exist_ok=True) if os.listdir(output_dir): # nocoverage raise Exception("Output directory should be empty!") mattermost_data_file = os.path.join(mattermost_data_dir, "export.json") mattermost_data = mattermost_data_file_to_dict(mattermost_data_file) username_to_user = create_username_to_user_mapping(mattermost_data["user"]) for team in mattermost_data["team"]: realm_id = NEXT_ID("realm_id") team_name = team["name"] user_handler = UserHandler() subscriber_handler = SubscriberHandler() user_id_mapper = IdMapper() stream_id_mapper = IdMapper() huddle_id_mapper = IdMapper() print("Generating data for", team_name) realm = make_realm(realm_id, team) realm_output_dir = os.path.join(output_dir, team_name) reset_mirror_dummy_users(username_to_user) label_mirror_dummy_users( len(mattermost_data["team"]), team_name, mattermost_data, username_to_user ) convert_user_data( user_handler=user_handler, user_id_mapper=user_id_mapper, user_data_map=username_to_user, realm_id=realm_id, team_name=team_name, ) zerver_stream = convert_channel_data( channel_data=mattermost_data["channel"], user_data_map=username_to_user, subscriber_handler=subscriber_handler, stream_id_mapper=stream_id_mapper, user_id_mapper=user_id_mapper, realm_id=realm_id, team_name=team_name, ) realm["zerver_stream"] = zerver_stream zerver_huddle: List[ZerverFieldsT] = [] if len(mattermost_data["team"]) == 1: zerver_huddle = convert_huddle_data( huddle_data=mattermost_data["direct_channel"], user_data_map=username_to_user, subscriber_handler=subscriber_handler, huddle_id_mapper=huddle_id_mapper, user_id_mapper=user_id_mapper, realm_id=realm_id, team_name=team_name, ) realm["zerver_huddle"] = zerver_huddle all_users = user_handler.get_all_users() zerver_recipient = build_recipients( zerver_userprofile=all_users, zerver_stream=zerver_stream, zerver_huddle=zerver_huddle, ) realm["zerver_recipient"] = zerver_recipient stream_subscriptions = build_stream_subscriptions( get_users=subscriber_handler.get_users, zerver_recipient=zerver_recipient, zerver_stream=zerver_stream, ) huddle_subscriptions = build_huddle_subscriptions( get_users=subscriber_handler.get_users, zerver_recipient=zerver_recipient, zerver_huddle=zerver_huddle, ) personal_subscriptions = build_personal_subscriptions( zerver_recipient=zerver_recipient, ) # Mattermost currently supports only exporting messages from channels. # Personal messages and huddles are not exported. zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions realm["zerver_subscription"] = zerver_subscription zerver_realmemoji = write_emoticon_data( realm_id=realm_id, custom_emoji_data=mattermost_data["emoji"], data_dir=mattermost_data_dir, output_dir=realm_output_dir, ) realm["zerver_realmemoji"] = zerver_realmemoji subscriber_map = make_subscriber_map( zerver_subscription=zerver_subscription, ) total_reactions: List[Dict[str, Any]] = [] uploads_list: List[ZerverFieldsT] = [] zerver_attachment: List[ZerverFieldsT] = [] write_message_data( num_teams=len(mattermost_data["team"]), team_name=team_name, realm_id=realm_id, post_data=mattermost_data["post"], zerver_recipient=zerver_recipient, subscriber_map=subscriber_map, output_dir=realm_output_dir, masking_content=masking_content, stream_id_mapper=stream_id_mapper, huddle_id_mapper=huddle_id_mapper, user_id_mapper=user_id_mapper, user_handler=user_handler, zerver_realmemoji=zerver_realmemoji, total_reactions=total_reactions, uploads_list=uploads_list, zerver_attachment=zerver_attachment, mattermost_data_dir=mattermost_data_dir, ) realm["zerver_reaction"] = total_reactions realm["zerver_userprofile"] = user_handler.get_all_users() realm["sort_by_date"] = True create_converted_data_files(realm, realm_output_dir, "/realm.json") # Mattermost currently doesn't support exporting avatars create_converted_data_files([], realm_output_dir, "/avatars/records.json") # Export message attachments attachment: Dict[str, List[Any]] = {"zerver_attachment": zerver_attachment} create_converted_data_files(uploads_list, realm_output_dir, "/uploads/records.json") create_converted_data_files(attachment, realm_output_dir, "/attachment.json") logging.info("Start making tarball") subprocess.check_call(["tar", "-czf", realm_output_dir + ".tar.gz", realm_output_dir, "-P"]) logging.info("Done making tarball")
def write_emoticon_data(realm_id: int, data_dir: str, output_dir: str) -> List[ZerverFieldsT]: ''' This function does most of the work for processing emoticons, the bulk of which is copying files. We also write a json file with metadata. Finally, we return a list of RealmEmoji dicts to our caller. In our data_dir we have a pretty simple setup: emoticons.json - has very simple metadata on emojis: { "Emoticon": { "id": 9875487, "path": "emoticons/yasss.jpg", "shortcut": "yasss" } }, { "Emoticon": { "id": 718017, "path": "emoticons/yayyyyy.gif", "shortcut": "yayyyyy" } } emoticons/ - contains a bunch of image files: slytherinsnake.gif spanishinquisition.jpg sparkle.png spiderman.gif stableparrot.gif stalkerparrot.gif supergirl.png superman.png We move all the relevant files to Zulip's more nested directory structure. ''' logging.info('Starting to process emoticons') fn = 'emoticons.json' data_file = os.path.join(data_dir, fn) data = json.load(open(data_file)) flat_data = [ dict( path=d['Emoticon']['path'], name=d['Emoticon']['shortcut'], ) for d in data ] emoji_folder = os.path.join(output_dir, 'emoji') os.makedirs(emoji_folder, exist_ok=True) def process(data: ZerverFieldsT) -> ZerverFieldsT: source_sub_path = data['path'] source_fn = os.path.basename(source_sub_path) source_path = os.path.join(data_dir, source_sub_path) # Use our template from RealmEmoji # PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}" target_fn = source_fn target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=realm_id, emoji_file_name=target_fn, ) target_path = os.path.join(emoji_folder, target_sub_path) os.makedirs(os.path.dirname(target_path), exist_ok=True) source_path = os.path.abspath(source_path) target_path = os.path.abspath(target_path) shutil.copyfile(source_path, target_path) return dict( path=target_path, file_name=target_fn, realm_id=realm_id, name=data['name'], ) emoji_records = list(map(process, flat_data)) create_converted_data_files(emoji_records, output_dir, '/emoji/records.json') realmemoji = [ build_realm_emoji( realm_id=realm_id, name=rec['name'], id=NEXT_ID('realmemoji'), file_name=rec['file_name'], ) for rec in emoji_records ] logging.info('Done processing emoticons') return realmemoji
def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, attachment_handler: AttachmentHandler, get_recipient_id: Callable[[ZerverFieldsT], int], is_pm_data: bool, output_dir: str) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) hipchat_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(hipchat_mention, zulip_mention) content = content.replace('@here', '@**all**') return content mention_map: Dict[int, Set[int]] = dict() zerver_message = [] import html2text h = html2text.HTML2Text() for raw_message in raw_messages: # One side effect here: message_id = NEXT_ID('message') mention_user_ids = { user_id_mapper.get(id) for id in set(raw_message['mention_user_ids']) if user_id_mapper.has(id) } mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message['content'], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: logging.info('skipping too-long message of length %s' % (len(content), )) continue date_sent = raw_message['date_sent'] try: recipient_id = get_recipient_id(raw_message) except KeyError: logging.debug( "Could not find recipient_id for a message, skipping.") continue rendered_content = None if is_pm_data: topic_name = '' else: topic_name = 'imported from hipchat' user_id = raw_message['sender_id'] # Another side effect: extra_content = attachment_handler.handle_message_data( realm_id=realm_id, message_id=message_id, sender_id=user_id, attachment=raw_message['attachment'], files_dir=raw_message['files_dir'], ) if extra_content: has_attachment = True content += '\n' + extra_content else: has_attachment = False message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=user_id, has_attachment=has_attachment, ) zerver_message.append(message) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id') message_file = "/messages-%06d.json" % (dump_file_id, ) create_converted_data_files(message_json, output_dir, message_file)
def convert_slack_workspace_messages( slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int, slack_user_id_to_zulip_user_id: SlackToZulipUserIDT, slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT, added_channels: AddedChannelsT, added_mpims: AddedMPIMsT, dm_members: DMMembersT, realm: ZerverFieldsT, zerver_userprofile: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], domain_name: str, output_dir: str, chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE ) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]: """ Returns: 1. reactions, which is a list of the reactions 2. uploads, which is a list of uploads to be mapped in uploads records.json 3. attachment, which is a list of the attachments """ long_term_idle = process_long_term_idle_users( slack_data_dir, users, slack_user_id_to_zulip_user_id, added_channels, added_mpims, dm_members, zerver_userprofile) all_messages = get_messages_iterator(slack_data_dir, added_channels, added_mpims, dm_members) logging.info('######### IMPORTING MESSAGES STARTED #########\n') total_reactions = [] # type: List[ZerverFieldsT] total_attachments = [] # type: List[ZerverFieldsT] total_uploads = [] # type: List[ZerverFieldsT] dump_file_id = 1 subscriber_map = make_subscriber_map( zerver_subscription=realm['zerver_subscription'], ) while True: message_data = [] _counter = 0 for msg in all_messages: _counter += 1 message_data.append(msg) if _counter == chunk_size: break if len(message_data) == 0: break zerver_message, zerver_usermessage, attachment, uploads, reactions = \ channel_message_to_zerver_message( realm_id, users, slack_user_id_to_zulip_user_id, slack_recipient_name_to_zulip_recipient_id, message_data, zerver_realmemoji, subscriber_map, added_channels, dm_members, domain_name, long_term_idle) message_json = dict(zerver_message=zerver_message, zerver_usermessage=zerver_usermessage) message_file = "/messages-%06d.json" % (dump_file_id, ) logging.info("Writing Messages to %s\n" % (output_dir + message_file, )) create_converted_data_files(message_json, output_dir, message_file) total_reactions += reactions total_attachments += attachment total_uploads += uploads dump_file_id += 1 logging.info('######### IMPORTING MESSAGES FINISHED #########\n') return total_reactions, total_uploads, total_attachments
def do_convert_data(input_tar_file: str, output_dir: str) -> None: input_data_dir = untar_input_file(input_tar_file) realm_id = 0 realm = make_realm(realm_id=realm_id) # users.json -> UserProfile raw_user_data = read_user_data(data_dir=input_data_dir) zerver_userprofile = convert_user_data( raw_data=raw_user_data, realm_id=realm_id, ) realm['zerver_userprofile'] = zerver_userprofile # streams.json -> Stream raw_stream_data = read_room_data(data_dir=input_data_dir) zerver_stream = convert_room_data( raw_data=raw_stream_data, realm_id=realm_id, ) realm['zerver_stream'] = zerver_stream zerver_recipient = build_recipients( zerver_userprofile=zerver_userprofile, zerver_stream=zerver_stream, ) realm['zerver_recipient'] = zerver_recipient zerver_subscription = build_subscriptions( zerver_userprofile=zerver_userprofile, zerver_recipient=zerver_recipient, zerver_stream=zerver_stream, ) realm['zerver_subscription'] = zerver_subscription zerver_realmemoji = write_emoticon_data( realm_id=realm_id, data_dir=input_data_dir, output_dir=output_dir, ) realm['zerver_realmemoji'] = zerver_realmemoji create_converted_data_files(realm, output_dir, '/realm.json') logging.info('Start importing message data') for message_key in ['UserMessage', 'PrivateUserMessage']: write_message_data( realm_id=realm_id, message_key=message_key, zerver_recipient=zerver_recipient, zerver_subscription=zerver_subscription, zerver_userprofile=zerver_userprofile, data_dir=input_data_dir, output_dir=output_dir, ) logging.info('Start importing avatar data') write_avatar_data( raw_user_data=raw_user_data, output_dir=output_dir, realm_id=realm_id, ) write_upload_data( output_dir=output_dir, realm_id=realm_id, ) logging.info('Start making tarball') subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) logging.info('Done making tarball')
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int=6) -> None: # Subdomain is set by the user while running the import command realm_subdomain = "" realm_id = 0 domain_name = settings.EXTERNAL_HOST slack_data_dir = slack_zip_file.replace('.zip', '') if not os.path.exists(slack_data_dir): os.makedirs(slack_data_dir) os.makedirs(output_dir, exist_ok=True) # output directory should be empty initially if os.listdir(output_dir): raise Exception('Output directory should be empty!') subprocess.check_call(['unzip', '-q', slack_zip_file, '-d', slack_data_dir]) # with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref: # zip_ref.extractall(slack_data_dir) # We get the user data from the legacy token method of slack api, which is depreciated # but we use it as the user email data is provided only in this method user_list = get_slack_api_data(token, "https://slack.com/api/users.list", "members") # Get custom emoji from slack api custom_emoji_list = get_slack_api_data(token, "https://slack.com/api/emoji.list", "emoji") realm, added_users, added_recipient, added_channels, avatar_list, \ emoji_url_map = slack_workspace_to_realm(domain_name, realm_id, user_list, realm_subdomain, slack_data_dir, custom_emoji_list) reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages( slack_data_dir, user_list, realm_id, added_users, added_recipient, added_channels, realm, realm['zerver_realmemoji'], domain_name, output_dir) # Move zerver_reactions to realm.json file realm['zerver_reaction'] = reactions emoji_folder = os.path.join(output_dir, 'emoji') os.makedirs(emoji_folder, exist_ok=True) emoji_records = process_emojis(realm['zerver_realmemoji'], emoji_folder, emoji_url_map, threads) avatar_folder = os.path.join(output_dir, 'avatars') avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) os.makedirs(avatar_realm_folder, exist_ok=True) avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads, size_url_suffix='-512') uploads_folder = os.path.join(output_dir, 'uploads') os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True) uploads_records = process_uploads(uploads_list, uploads_folder, threads) attachment = {"zerver_attachment": zerver_attachment} # IO realm.json create_converted_data_files(realm, output_dir, '/realm.json') # IO emoji records create_converted_data_files(emoji_records, output_dir, '/emoji/records.json') # IO avatar records create_converted_data_files(avatar_records, output_dir, '/avatars/records.json') # IO uploads records create_converted_data_files(uploads_records, output_dir, '/uploads/records.json') # IO attachments records create_converted_data_files(attachment, output_dir, '/attachment.json') # remove slack dir rm_tree(slack_data_dir) subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) logging.info('######### DATA CONVERSION FINISHED #########\n') logging.info("Zulip data dump created at %s" % (output_dir))
def build_custom_emoji(realm_id: int, custom_emoji_data: Dict[str, List[Dict[str, Any]]], output_dir: str) -> List[ZerverFieldsT]: logging.info("Starting to process custom emoji") emoji_folder = os.path.join(output_dir, "emoji") os.makedirs(emoji_folder, exist_ok=True) zerver_realmemoji: List[ZerverFieldsT] = [] emoji_records: List[ZerverFieldsT] = [] # Map emoji file_id to emoji file data emoji_file_data = {} for emoji_file in custom_emoji_data["file"]: emoji_file_data[emoji_file["_id"]] = { "filename": emoji_file["filename"], "chunks": [] } for emoji_chunk in custom_emoji_data["chunk"]: emoji_file_data[emoji_chunk["files_id"]]["chunks"].append( emoji_chunk["data"]) # Build custom emoji for rc_emoji in custom_emoji_data["emoji"]: # Subject to change with changes in database emoji_file_id = ".".join([rc_emoji["name"], rc_emoji["extension"]]) emoji_file_info = emoji_file_data[emoji_file_id] emoji_filename = emoji_file_info["filename"] emoji_data = b"".join(emoji_file_info["chunks"]) target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=realm_id, emoji_file_name=emoji_filename, ) target_path = os.path.join(emoji_folder, target_sub_path) os.makedirs(os.path.dirname(target_path), exist_ok=True) with open(target_path, "wb") as e_file: e_file.write(emoji_data) emoji_aliases = [rc_emoji["name"]] emoji_aliases.extend(rc_emoji["aliases"]) for alias in emoji_aliases: emoji_record = dict( path=target_path, s3_path=target_path, file_name=emoji_filename, realm_id=realm_id, name=alias, ) emoji_records.append(emoji_record) realmemoji = build_realm_emoji( realm_id=realm_id, name=alias, id=NEXT_ID("realmemoji"), file_name=emoji_filename, ) zerver_realmemoji.append(realmemoji) create_converted_data_files(emoji_records, output_dir, "/emoji/records.json") logging.info("Done processing emoji") return zerver_realmemoji