def convert_gitter_workspace_messages(gitter_data: GitterDataT, output_dir: str, subscriber_map: Dict[int, Set[int]], user_map: Dict[str, int], stream_map: Dict[str, int], user_short_name_to_full_name: Dict[str, str], chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> None: """ Messages are stored in batches """ logging.info('######### IMPORTING MESSAGES STARTED #########\n') message_id = 0 low_index = 0 upper_index = low_index + chunk_size dump_file_id = 1 while True: message_json = {} zerver_message = [] zerver_usermessage: List[ZerverFieldsT] = [] message_data = gitter_data[low_index: upper_index] if len(message_data) == 0: break for message in message_data: message_time = dateutil.parser.parse(message['sent']).timestamp() mentioned_user_ids = get_usermentions(message, user_map, user_short_name_to_full_name) rendered_content = None topic_name = 'imported from gitter' + (f' room {message["room"]}' if 'room' in message else '') user_id = user_map[message['fromUser']['id']] recipient_id = stream_map[message['room']] if 'room' in message else 0 zulip_message = build_message(topic_name, float(message_time), message_id, message['text'], rendered_content, user_id, recipient_id) zerver_message.append(zulip_message) build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, is_private=False, ) message_id += 1 message_json['zerver_message'] = zerver_message message_json['zerver_usermessage'] = zerver_usermessage message_filename = os.path.join(output_dir, f"messages-{dump_file_id:06}.json") logging.info("Writing Messages to %s\n", message_filename) write_data_to_file(os.path.join(message_filename), message_json) low_index = upper_index upper_index = chunk_size + low_index dump_file_id += 1 logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
def convert_gitter_workspace_messages(gitter_data: GitterDataT, output_dir: str, subscriber_map: Dict[int, Set[int]], user_map: Dict[str, int], user_short_name_to_full_name: Dict[str, str], chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> None: """ Messages are stored in batches """ logging.info('######### IMPORTING MESSAGES STARTED #########\n') message_id = 0 recipient_id = 0 # Corresponding to stream "gitter" low_index = 0 upper_index = low_index + chunk_size dump_file_id = 1 while True: message_json = {} zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] message_data = gitter_data[low_index: upper_index] if len(message_data) == 0: break for message in message_data: message_time = dateutil.parser.parse(message['sent']).timestamp() mentioned_user_ids = get_usermentions(message, user_map, user_short_name_to_full_name) rendered_content = None topic_name = 'imported from gitter' user_id = user_map[message['fromUser']['id']] zulip_message = build_message(topic_name, float(message_time), message_id, message['text'], rendered_content, user_id, recipient_id) zerver_message.append(zulip_message) build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, ) message_id += 1 message_json['zerver_message'] = zerver_message message_json['zerver_usermessage'] = zerver_usermessage message_filename = os.path.join(output_dir, "messages-%06d.json" % (dump_file_id,)) logging.info("Writing Messages to %s\n" % (message_filename,)) write_data_to_file(os.path.join(message_filename), message_json) low_index = upper_index upper_index = chunk_size + low_index dump_file_id += 1 logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
def test_build_zerver_message(self) -> None: zerver_usermessage = [] # type: List[Dict[str, Any]] usermessage_id_count = 0 zerver_subscription = [{'recipient': 2, 'user_profile': 7}, {'recipient': 4, 'user_profile': 12}, {'recipient': 2, 'user_profile': 16}, {'recipient': 2, 'user_profile': 15}, {'recipient': 2, 'user_profile': 3}] recipient_id = 2 mentioned_users_id = [12, 3, 16] message_id = 9 test_usermessage_id = build_usermessages(zerver_usermessage, usermessage_id_count, zerver_subscription, recipient_id, mentioned_users_id, message_id) self.assertEqual(test_usermessage_id, 4) self.assertEqual(zerver_usermessage[0]['flags_mask'], 1) self.assertEqual(zerver_usermessage[0]['id'], 0) self.assertEqual(zerver_usermessage[0]['message'], message_id) self.assertEqual(zerver_usermessage[1]['user_profile'], zerver_subscription[2]['user_profile']) self.assertEqual(zerver_usermessage[1]['flags_mask'], 9) self.assertEqual(zerver_usermessage[3]['id'], 3) self.assertEqual(zerver_usermessage[3]['message'], message_id)
def convert_gitter_workspace_messages( gitter_data: GitterDataT, output_dir: str, zerver_subscription: List[ZerverFieldsT], user_map: Dict[str, int], user_short_name_to_full_name: Dict[str, str], chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE) -> None: """ Messages are stored in batches """ logging.info('######### IMPORTING MESSAGES STARTED #########\n') message_id = usermessage_id = 0 recipient_id = 0 # Corresponding to stream "gitter" low_index = 0 upper_index = low_index + chunk_size dump_file_id = 1 while True: message_json = {} zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] message_data = gitter_data[low_index:upper_index] if len(message_data) == 0: break for message in message_data: message_time = dateutil.parser.parse(message['sent']).timestamp() mentioned_user_ids = get_usermentions( message, user_map, user_short_name_to_full_name) rendered_content = None subject = 'imported from gitter' user_id = user_map[message['fromUser']['id']] zulip_message = build_message(subject, float(message_time), message_id, message['text'], rendered_content, user_id, recipient_id) zerver_message.append(zulip_message) usermessage_id = build_usermessages(zerver_usermessage, usermessage_id, zerver_subscription, recipient_id, mentioned_user_ids, message_id) message_id += 1 message_json['zerver_message'] = zerver_message message_json['zerver_usermessage'] = zerver_usermessage message_filename = os.path.join( output_dir, "messages-%06d.json" % (dump_file_id, )) logging.info("Writing Messages to %s\n" % (message_filename, )) write_data_to_file(os.path.join(message_filename), message_json) low_index = upper_index upper_index = chunk_size + low_index dump_file_id += 1 logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
def test_build_zerver_message(self) -> None: zerver_usermessage = [] # type: List[Dict[str, Any]] # recipient_id -> set of user_ids subscriber_map = { 2: {3, 7, 15, 16}, # these we care about 4: {12}, 6: {19, 21}, } recipient_id = 2 mentioned_user_ids = [7] message_id = 9 um_id = NEXT_ID('user_message') build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, is_private=False, ) self.assertEqual(zerver_usermessage[0]['id'], um_id + 1) self.assertEqual(zerver_usermessage[0]['message'], message_id) self.assertEqual(zerver_usermessage[0]['flags_mask'], 1) self.assertEqual(zerver_usermessage[1]['id'], um_id + 2) self.assertEqual(zerver_usermessage[1]['message'], message_id) self.assertEqual(zerver_usermessage[1]['user_profile'], 7) self.assertEqual(zerver_usermessage[1]['flags_mask'], 9) # mentioned self.assertEqual(zerver_usermessage[2]['id'], um_id + 3) self.assertEqual(zerver_usermessage[2]['message'], message_id) self.assertEqual(zerver_usermessage[3]['id'], um_id + 4) self.assertEqual(zerver_usermessage[3]['message'], message_id)
def test_build_zerver_message(self) -> None: zerver_usermessage = [] # type: List[Dict[str, Any]] # recipient_id -> set of user_ids subscriber_map = { 2: {3, 7, 15, 16}, # these we care about 4: {12}, 6: {19, 21}, } recipient_id = 2 mentioned_user_ids = [7] message_id = 9 um_id = NEXT_ID('user_message') build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, ) self.assertEqual(zerver_usermessage[0]['id'], um_id + 1) self.assertEqual(zerver_usermessage[0]['message'], message_id) self.assertEqual(zerver_usermessage[0]['flags_mask'], 1) self.assertEqual(zerver_usermessage[1]['id'], um_id + 2) self.assertEqual(zerver_usermessage[1]['message'], message_id) self.assertEqual(zerver_usermessage[1]['user_profile'], 7) self.assertEqual(zerver_usermessage[1]['flags_mask'], 9) # mentioned self.assertEqual(zerver_usermessage[2]['id'], um_id + 3) self.assertEqual(zerver_usermessage[2]['message'], message_id) self.assertEqual(zerver_usermessage[3]['id'], um_id + 4) self.assertEqual(zerver_usermessage[3]['message'], message_id)
def test_build_zerver_message(self) -> None: zerver_usermessage = [] # type: List[Dict[str, Any]] usermessage_id_count = 0 zerver_subscription = [{ 'recipient': 2, 'user_profile': 7 }, { 'recipient': 4, 'user_profile': 12 }, { 'recipient': 2, 'user_profile': 16 }, { 'recipient': 2, 'user_profile': 15 }, { 'recipient': 2, 'user_profile': 3 }] recipient_id = 2 mentioned_users_id = [12, 3, 16] message_id = 9 test_usermessage_id = build_usermessages( zerver_usermessage, usermessage_id_count, zerver_subscription, recipient_id, mentioned_users_id, message_id) self.assertEqual(test_usermessage_id, 4) self.assertEqual(zerver_usermessage[0]['flags_mask'], 1) self.assertEqual(zerver_usermessage[0]['id'], 0) self.assertEqual(zerver_usermessage[0]['message'], message_id) self.assertEqual(zerver_usermessage[1]['user_profile'], zerver_subscription[2]['user_profile']) self.assertEqual(zerver_usermessage[1]['flags_mask'], 9) self.assertEqual(zerver_usermessage[3]['id'], 3) self.assertEqual(zerver_usermessage[3]['message'], message_id)
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT], added_users: AddedUsersT, added_recipient: AddedRecipientsT, all_messages: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], added_channels: AddedChannelsT, id_list: Tuple[int, int, int, int], domain_name: str) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], Tuple[int, int, int, int]]: """ Returns: 1. zerver_message, which is a list of the messages 2. zerver_usermessage, which is a list of the usermessages 3. zerver_attachment, which is a list of the attachments 4. uploads_list, which is a list of uploads to be mapped in uploads records.json 5. reaction_list, which is a list of all user reactions 6. id_list, which is a tuple of max ids of messages, usermessages, reactions and attachments """ message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count = id_list zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] uploads_list = [] # type: List[ZerverFieldsT] zerver_attachment = [] # type: List[ZerverFieldsT] reaction_list = [] # type: List[ZerverFieldsT] # For unicode emoji with open(NAME_TO_CODEPOINT_PATH) as fp: name_to_codepoint = ujson.load(fp) for message in all_messages: user = get_message_sending_user(message) if not user: # Ignore messages without user names # These are Sometimes produced by slack continue subtype = message.get('subtype', False) if subtype in [ # Zulip doesn't have a pinned_item concept "pinned_item", "unpinned_item", # Slack's channel join/leave notices are spammy "channel_join", "channel_leave", "channel_name" ]: continue has_attachment = has_image = False try: content, mentioned_users_id, has_link = convert_to_zulip_markdown( message['text'], users, added_channels, added_users) except Exception: print("Slack message unexpectedly missing text representation:") print(json.dumps(message, indent=4)) continue rendered_content = None recipient_id = added_recipient[message['channel_name']] message_id = message_id_count # Process message reactions if 'reactions' in message.keys(): reaction_id_count = build_reactions(reaction_list, message['reactions'], added_users, message_id, reaction_id_count, name_to_codepoint, zerver_realmemoji) # Process different subtypes of slack messages # Subtypes which have only the action in the message should # be rendered with '/me' in the content initially # For example "sh_room_created" has the message 'started a call' # which should be displayed as '/me started a call' if subtype in ["bot_add", "sh_room_created", "me_message"]: content = ('/me %s' % (content)) files = message.get('files', []) if subtype == 'file_share': # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message['file']] for fileinfo in files: url = fileinfo['url_private'] # For attachments with slack download link if 'files.slack.com' in url: has_attachment = has_link = True has_image = True if 'image' in fileinfo['mimetype'] else False file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content = get_attachment_path_and_content(fileinfo, realm_id) # construct attachments build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path, uploads_list) attachment_id = attachment_id_count build_attachment(realm_id, message_id, attachment_id, added_users[user], fileinfo, s3_path, zerver_attachment) attachment_id_count += 1 # For attachments with link not from slack # Example: Google drive integration else: has_link = True if 'title' in fileinfo: file_name = fileinfo['title'] else: file_name = fileinfo['name'] content = '[%s](%s)' % (file_name, fileinfo['url_private']) # construct message subject = 'imported from slack' zulip_message = build_message(subject, float(message['ts']), message_id, content, rendered_content, added_users[user], recipient_id, has_image, has_link, has_attachment) zerver_message.append(zulip_message) # construct usermessages usermessage_id_count = build_usermessages( zerver_usermessage, usermessage_id_count, zerver_subscription, recipient_id, mentioned_users_id, message_id) message_id_count += 1 id_list = (message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count) return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \ reaction_list, id_list
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT], added_users: AddedUsersT, added_recipient: AddedRecipientsT, all_messages: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], subscriber_map: Dict[int, Set[int]], added_channels: AddedChannelsT, domain_name: str) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]: """ Returns: 1. zerver_message, which is a list of the messages 2. zerver_usermessage, which is a list of the usermessages 3. zerver_attachment, which is a list of the attachments 4. uploads_list, which is a list of uploads to be mapped in uploads records.json 5. reaction_list, which is a list of all user reactions """ zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] uploads_list = [] # type: List[ZerverFieldsT] zerver_attachment = [] # type: List[ZerverFieldsT] reaction_list = [] # type: List[ZerverFieldsT] # For unicode emoji with open(NAME_TO_CODEPOINT_PATH) as fp: name_to_codepoint = ujson.load(fp) for message in all_messages: user = get_message_sending_user(message) if not user: # Ignore messages without user names # These are Sometimes produced by slack continue subtype = message.get('subtype', False) if subtype in [ # Zulip doesn't have a pinned_item concept "pinned_item", "unpinned_item", # Slack's channel join/leave notices are spammy "channel_join", "channel_leave", "channel_name" ]: continue try: content, mentioned_user_ids, has_link = convert_to_zulip_markdown( message['text'], users, added_channels, added_users) except Exception: print("Slack message unexpectedly missing text representation:") print(ujson.dumps(message, indent=4)) continue rendered_content = None recipient_id = added_recipient[message['channel_name']] message_id = NEXT_ID('message') # Process message reactions if 'reactions' in message.keys(): build_reactions(reaction_list, message['reactions'], added_users, message_id, name_to_codepoint, zerver_realmemoji) # Process different subtypes of slack messages # Subtypes which have only the action in the message should # be rendered with '/me' in the content initially # For example "sh_room_created" has the message 'started a call' # which should be displayed as '/me started a call' if subtype in ["bot_add", "sh_room_created", "me_message"]: content = ('/me %s' % (content)) if subtype == 'file_comment': # The file_comment message type only indicates the # responsible user in a subfield. message['user'] = message['comment']['user'] file_info = process_message_files( message=message, domain_name=domain_name, realm_id=realm_id, message_id=message_id, user=user, users=users, added_users=added_users, zerver_attachment=zerver_attachment, uploads_list=uploads_list, ) content += file_info['content'] has_link = has_link or file_info['has_link'] has_attachment = file_info['has_attachment'] has_image = file_info['has_image'] # construct message subject = 'imported from slack' zulip_message = build_message(subject, float(message['ts']), message_id, content, rendered_content, added_users[user], recipient_id, has_image, has_link, has_attachment) zerver_message.append(zulip_message) # construct usermessages build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, ) return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \ reaction_list
def channel_message_to_zerver_message( realm_id: int, users: List[ZerverFieldsT], slack_user_id_to_zulip_user_id: SlackToZulipUserIDT, slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT, all_messages: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], subscriber_map: Dict[int, Set[int]], added_channels: AddedChannelsT, dm_members: DMMembersT, domain_name: str, long_term_idle: Set[int] ) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]: """ Returns: 1. zerver_message, which is a list of the messages 2. zerver_usermessage, which is a list of the usermessages 3. zerver_attachment, which is a list of the attachments 4. uploads_list, which is a list of uploads to be mapped in uploads records.json 5. reaction_list, which is a list of all user reactions """ zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] uploads_list = [] # type: List[ZerverFieldsT] zerver_attachment = [] # type: List[ZerverFieldsT] reaction_list = [] # type: List[ZerverFieldsT] # For unicode emoji with open(NAME_TO_CODEPOINT_PATH) as fp: name_to_codepoint = ujson.load(fp) total_user_messages = 0 total_skipped_user_messages = 0 for message in all_messages: slack_user_id = get_message_sending_user(message) if not slack_user_id: # Ignore messages without slack_user_id # These are Sometimes produced by slack continue subtype = message.get('subtype', False) if subtype in [ # Zulip doesn't have a pinned_item concept "pinned_item", "unpinned_item", # Slack's channel join/leave notices are spammy "channel_join", "channel_leave", "channel_name" ]: continue try: content, mentioned_user_ids, has_link = convert_to_zulip_markdown( message['text'], users, added_channels, slack_user_id_to_zulip_user_id) except Exception: print("Slack message unexpectedly missing text representation:") print(ujson.dumps(message, indent=4)) continue rendered_content = None if "channel_name" in message: is_private = False recipient_id = slack_recipient_name_to_zulip_recipient_id[ message['channel_name']] elif "mpim_name" in message: is_private = True recipient_id = slack_recipient_name_to_zulip_recipient_id[ message['mpim_name']] elif "pm_name" in message: is_private = True sender = get_message_sending_user(message) members = dm_members[message['pm_name']] if sender == members[0]: recipient_id = slack_recipient_name_to_zulip_recipient_id[ members[1]] sender_recipient_id = slack_recipient_name_to_zulip_recipient_id[ members[0]] else: recipient_id = slack_recipient_name_to_zulip_recipient_id[ members[0]] sender_recipient_id = slack_recipient_name_to_zulip_recipient_id[ members[1]] message_id = NEXT_ID('message') if 'reactions' in message.keys(): build_reactions(reaction_list, message['reactions'], slack_user_id_to_zulip_user_id, message_id, name_to_codepoint, zerver_realmemoji) # Process different subtypes of slack messages # Subtypes which have only the action in the message should # be rendered with '/me' in the content initially # For example "sh_room_created" has the message 'started a call' # which should be displayed as '/me started a call' if subtype in ["bot_add", "sh_room_created", "me_message"]: content = '/me %s' % (content, ) if subtype == 'file_comment': # The file_comment message type only indicates the # responsible user in a subfield. message['user'] = message['comment']['user'] file_info = process_message_files( message=message, domain_name=domain_name, realm_id=realm_id, message_id=message_id, slack_user_id=slack_user_id, users=users, slack_user_id_to_zulip_user_id=slack_user_id_to_zulip_user_id, zerver_attachment=zerver_attachment, uploads_list=uploads_list, ) content += file_info['content'] has_link = has_link or file_info['has_link'] has_attachment = file_info['has_attachment'] has_image = file_info['has_image'] topic_name = 'imported from slack' zulip_message = build_message( topic_name, float(message['ts']), message_id, content, rendered_content, slack_user_id_to_zulip_user_id[slack_user_id], recipient_id, has_image, has_link, has_attachment) zerver_message.append(zulip_message) (num_created, num_skipped) = build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, is_private=is_private, long_term_idle=long_term_idle, ) total_user_messages += num_created total_skipped_user_messages += num_skipped if "pm_name" in message and recipient_id != sender_recipient_id: (num_created, num_skipped) = build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=sender_recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, is_private=is_private, long_term_idle=long_term_idle, ) total_user_messages += num_created total_skipped_user_messages += num_skipped logging.debug( "Created %s UserMessages; deferred %s due to long-term idle" % (total_user_messages, total_skipped_user_messages)) return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \ reaction_list
def channel_message_to_zerver_message( realm_id: int, users: List[ZerverFieldsT], added_users: AddedUsersT, added_recipient: AddedRecipientsT, all_messages: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], added_channels: AddedChannelsT, id_list: Tuple[int, int, int, int], domain_name: str ) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], Tuple[int, int, int, int]]: """ Returns: 1. zerver_message, which is a list of the messages 2. zerver_usermessage, which is a list of the usermessages 3. zerver_attachment, which is a list of the attachments 4. uploads_list, which is a list of uploads to be mapped in uploads records.json 5. reaction_list, which is a list of all user reactions 6. id_list, which is a tuple of max ids of messages, usermessages, reactions and attachments """ message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count = id_list zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] uploads_list = [] # type: List[ZerverFieldsT] zerver_attachment = [] # type: List[ZerverFieldsT] reaction_list = [] # type: List[ZerverFieldsT] # For unicode emoji with open(NAME_TO_CODEPOINT_PATH) as fp: name_to_codepoint = ujson.load(fp) for message in all_messages: user = get_message_sending_user(message) if not user: # Ignore messages without user names # These are Sometimes produced by slack continue subtype = message.get('subtype', False) if subtype in [ # Zulip doesn't have a pinned_item concept "pinned_item", "unpinned_item", # Slack's channel join/leave notices are spammy "channel_join", "channel_leave", "channel_name" ]: continue has_attachment = has_image = False try: content, mentioned_users_id, has_link = convert_to_zulip_markdown( message['text'], users, added_channels, added_users) except Exception: print("Slack message unexpectedly missing text representation:") print(json.dumps(message, indent=4)) continue rendered_content = None recipient_id = added_recipient[message['channel_name']] message_id = message_id_count # Process message reactions if 'reactions' in message.keys(): reaction_id_count = build_reactions( reaction_list, message['reactions'], added_users, message_id, reaction_id_count, name_to_codepoint, zerver_realmemoji) # Process different subtypes of slack messages # Subtypes which have only the action in the message should # be rendered with '/me' in the content initially # For example "sh_room_created" has the message 'started a call' # which should be displayed as '/me started a call' if subtype in ["bot_add", "sh_room_created", "me_message"]: content = ('/me %s' % (content)) files = message.get('files', []) if subtype == 'file_share': # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message['file']] for fileinfo in files: url = fileinfo['url_private'] # For attachments with slack download link if 'files.slack.com' in url: has_attachment = has_link = True has_image = True if 'image' in fileinfo['mimetype'] else False file_user = [ iterate_user for iterate_user in users if message['user'] == iterate_user['id'] ] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content = get_attachment_path_and_content( fileinfo, realm_id) # construct attachments build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path, uploads_list) attachment_id = attachment_id_count build_attachment(realm_id, message_id, attachment_id, added_users[user], fileinfo, s3_path, zerver_attachment) attachment_id_count += 1 # For attachments with link not from slack # Example: Google drive integration else: has_link = True if 'title' in fileinfo: file_name = fileinfo['title'] else: file_name = fileinfo['name'] content = '[%s](%s)' % (file_name, fileinfo['url_private']) # construct message zulip_message = dict( sending_client=1, rendered_content_version=1, # This is Zulip-specific has_image=has_image, subject='imported from slack', # This is Zulip-specific pub_date=float(message['ts']), id=message_id, has_attachment= has_attachment, # attachment will be posted in the subsequent message; # this is how Slack does it, i.e. less like email edit_history=None, sender=added_users[user], # map slack id to zulip id content=content, rendered_content=rendered_content, # slack doesn't cache this recipient=recipient_id, last_edit_time=None, has_link=has_link) zerver_message.append(zulip_message) # construct usermessages usermessage_id_count = build_usermessages( zerver_usermessage, usermessage_id_count, zerver_subscription, recipient_id, mentioned_users_id, message_id) message_id_count += 1 id_list = (message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count) return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \ reaction_list, id_list
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT], added_users: AddedUsersT, added_recipient: AddedRecipientsT, all_messages: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], subscriber_map: Dict[int, Set[int]], added_channels: AddedChannelsT, domain_name: str, long_term_idle: Set[int]) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]: """ Returns: 1. zerver_message, which is a list of the messages 2. zerver_usermessage, which is a list of the usermessages 3. zerver_attachment, which is a list of the attachments 4. uploads_list, which is a list of uploads to be mapped in uploads records.json 5. reaction_list, which is a list of all user reactions """ zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] uploads_list = [] # type: List[ZerverFieldsT] zerver_attachment = [] # type: List[ZerverFieldsT] reaction_list = [] # type: List[ZerverFieldsT] # For unicode emoji with open(NAME_TO_CODEPOINT_PATH) as fp: name_to_codepoint = ujson.load(fp) total_user_messages = 0 total_skipped_user_messages = 0 for message in all_messages: user = get_message_sending_user(message) if not user: # Ignore messages without user names # These are Sometimes produced by slack continue subtype = message.get('subtype', False) if subtype in [ # Zulip doesn't have a pinned_item concept "pinned_item", "unpinned_item", # Slack's channel join/leave notices are spammy "channel_join", "channel_leave", "channel_name" ]: continue try: content, mentioned_user_ids, has_link = convert_to_zulip_markdown( message['text'], users, added_channels, added_users) except Exception: print("Slack message unexpectedly missing text representation:") print(ujson.dumps(message, indent=4)) continue rendered_content = None recipient_id = added_recipient[message['channel_name']] message_id = NEXT_ID('message') # Process message reactions if 'reactions' in message.keys(): build_reactions(reaction_list, message['reactions'], added_users, message_id, name_to_codepoint, zerver_realmemoji) # Process different subtypes of slack messages # Subtypes which have only the action in the message should # be rendered with '/me' in the content initially # For example "sh_room_created" has the message 'started a call' # which should be displayed as '/me started a call' if subtype in ["bot_add", "sh_room_created", "me_message"]: content = ('/me %s' % (content)) if subtype == 'file_comment': # The file_comment message type only indicates the # responsible user in a subfield. message['user'] = message['comment']['user'] file_info = process_message_files( message=message, domain_name=domain_name, realm_id=realm_id, message_id=message_id, user=user, users=users, added_users=added_users, zerver_attachment=zerver_attachment, uploads_list=uploads_list, ) content += file_info['content'] has_link = has_link or file_info['has_link'] has_attachment = file_info['has_attachment'] has_image = file_info['has_image'] # construct message topic_name = 'imported from slack' zulip_message = build_message(topic_name, float(message['ts']), message_id, content, rendered_content, added_users[user], recipient_id, has_image, has_link, has_attachment) zerver_message.append(zulip_message) # construct usermessages (num_created, num_skipped) = build_usermessages( zerver_usermessage=zerver_usermessage, subscriber_map=subscriber_map, recipient_id=recipient_id, mentioned_user_ids=mentioned_user_ids, message_id=message_id, long_term_idle=long_term_idle, ) total_user_messages += num_created total_skipped_user_messages += num_skipped logging.debug("Created %s UserMessages; deferred %s due to long-term idle" % ( total_user_messages, total_skipped_user_messages)) return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \ reaction_list