예제 #1
0
 def test_file_name(self) -> None:
     self.assertEqual(sanitize_name(u'test.txt'), u'test.txt')
     self.assertEqual(sanitize_name(u'.hidden'), u'.hidden')
     self.assertEqual(sanitize_name(u'.hidden.txt'), u'.hidden.txt')
     self.assertEqual(sanitize_name(u'tarball.tar.gz'), u'tarball.tar.gz')
     self.assertEqual(sanitize_name(u'.hidden_tarball.tar.gz'), u'.hidden_tarball.tar.gz')
     self.assertEqual(sanitize_name(u'Testing{}*&*#().ta&&%$##&&r.gz'), u'Testing.tar.gz')
     self.assertEqual(sanitize_name(u'*testingfile?*.txt'), u'testingfile.txt')
     self.assertEqual(sanitize_name(u'snowman☃.txt'), u'snowman.txt')
     self.assertEqual(sanitize_name(u'테스트.txt'), u'테스트.txt')
     self.assertEqual(sanitize_name(u'~/."\`\?*"u0`000ssh/test.t**{}ar.gz'), u'.u0000sshtest.tar.gz')
예제 #2
0
 def test_file_name(self) -> None:
     self.assertEqual(sanitize_name(u'test.txt'), u'test.txt')
     self.assertEqual(sanitize_name(u'.hidden'), u'.hidden')
     self.assertEqual(sanitize_name(u'.hidden.txt'), u'.hidden.txt')
     self.assertEqual(sanitize_name(u'tarball.tar.gz'), u'tarball.tar.gz')
     self.assertEqual(sanitize_name(u'.hidden_tarball.tar.gz'), u'.hidden_tarball.tar.gz')
     self.assertEqual(sanitize_name(u'Testing{}*&*#().ta&&%$##&&r.gz'), u'Testing.tar.gz')
     self.assertEqual(sanitize_name(u'*testingfile?*.txt'), u'testingfile.txt')
     self.assertEqual(sanitize_name(u'snowman☃.txt'), u'snowman.txt')
     self.assertEqual(sanitize_name(u'테스트.txt'), u'테스트.txt')
     self.assertEqual(sanitize_name(u'~/."\`\?*"u0`000ssh/test.t**{}ar.gz'), u'.u0000sshtest.tar.gz')
예제 #3
0
    def test_file_name(self) -> None:
        """
        Unicode filenames should be processed correctly.
        """
        self.login(self.example_email("hamlet"))
        for expected in ["Здравейте.txt", "test"]:
            fp = StringIO("bah!")
            fp.name = urllib.parse.quote(expected)

            result = self.client_post("/json/user_uploads", {'f1': fp})
            assert sanitize_name(expected) in result.json()['uri']
예제 #4
0
    def test_file_name(self) -> None:
        """
        Unicode filenames should be processed correctly.
        """
        self.login(self.example_email("hamlet"))
        for expected in ["Здравейте.txt", "test"]:
            fp = StringIO("bah!")
            fp.name = urllib.parse.quote(expected)

            result = self.client_post("/json/user_uploads", {'f1': fp})
            assert sanitize_name(expected) in result.json()['uri']
예제 #5
0
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str,
                                                                                     str]:
    # Should be kept in sync with its equivalent in zerver/lib/uploads in the function
    # 'upload_message_image'
    s3_path = "/".join([
        str(realm_id),
        format(random.randint(0, 255), 'x'),
        random_name(18),
        sanitize_name(fileinfo['name'])
    ])
    attachment_path = ('/user_uploads/%s' % (s3_path))
    content = '[%s](%s)' % (fileinfo['title'], attachment_path)

    return s3_path, content
예제 #6
0
def get_attachment_path_and_content(fileinfo: ZerverFieldsT,
                                    realm_id: int) -> Tuple[str, str]:
    # Should be kept in sync with its equivalent in zerver/lib/uploads in the function
    # 'upload_message_image'
    s3_path = "/".join([
        str(realm_id),
        format(random.randint(0, 255), 'x'),
        random_name(18),
        sanitize_name(fileinfo['name'])
    ])
    attachment_path = ('/user_uploads/%s' % (s3_path))
    content = '[%s](%s)' % (fileinfo['title'], attachment_path)

    return s3_path, content
예제 #7
0
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str,
                                                                                     str]:
    # Should be kept in sync with its equivalent in zerver/lib/uploads in the function
    # 'upload_message_file'
    s3_path = "/".join([
        str(realm_id),
        'SlackImportAttachment',  # This is a special placeholder which should be kept
                                  # in sync with 'exports.py' function 'import_message_data'
        format(random.randint(0, 255), 'x'),
        random_name(18),
        sanitize_name(fileinfo['name'])
    ])
    attachment_path = ('/user_uploads/%s' % (s3_path))
    content = '[%s](%s)' % (fileinfo['title'], attachment_path)

    return s3_path, content
예제 #8
0
파일: slack.py 프로젝트: kyoki/zulip
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str,
                                                                                     str]:
    # Should be kept in sync with its equivalent in zerver/lib/uploads in the function
    # 'upload_message_file'
    s3_path = "/".join([
        str(realm_id),
        'SlackImportAttachment',  # This is a special placeholder which should be kept
                                  # in sync with 'exports.py' function 'import_message_data'
        format(random.randint(0, 255), 'x'),
        random_name(18),
        sanitize_name(fileinfo['name'])
    ])
    attachment_path = ('/user_uploads/%s' % (s3_path))
    content = '[%s](%s)' % (fileinfo['title'], attachment_path)

    return s3_path, content
예제 #9
0
파일: slack.py 프로젝트: yushao2/zulip
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str, str]:
    # Should be kept in sync with its equivalent in zerver/lib/uploads in the function
    # 'upload_message_file'
    s3_path = "/".join(
        [
            str(realm_id),
            "SlackImportAttachment",  # This is a special placeholder which should be kept
            # in sync with 'exports.py' function 'import_message_data'
            format(random.randint(0, 255), "x"),
            secrets.token_urlsafe(18),
            sanitize_name(fileinfo["name"]),
        ]
    )
    attachment_path = f"/user_uploads/{s3_path}"
    content = "[{}]({})".format(fileinfo["title"], attachment_path)

    return s3_path, content
예제 #10
0
def import_uploads_s3(bucket_name: str,
                      import_dir: Path,
                      processing_avatars: bool = False,
                      processing_emojis: bool = False) -> None:
    upload_backend = S3UploadBackend()
    conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
    bucket = conn.get_bucket(bucket_name, validate=True)

    records_filename = os.path.join(import_dir, "records.json")
    with open(records_filename) as records_file:
        records = ujson.loads(records_file.read())

    re_map_foreign_keys_internal(records,
                                 'records',
                                 'realm_id',
                                 related_table="realm",
                                 id_field=True)
    re_map_foreign_keys_internal(records,
                                 'records',
                                 'user_profile_id',
                                 related_table="user_profile",
                                 id_field=True)
    for record in records:
        key = Key(bucket)

        if processing_avatars:
            # For avatars, we need to rehash the user's email with the
            # new server's avatar salt
            avatar_path = user_avatar_path_from_ids(record['user_profile_id'],
                                                    record['realm_id'])
            key.key = avatar_path
            if record['s3_path'].endswith('.original'):
                key.key += '.original'
        if processing_emojis:
            # For emojis we follow the function 'upload_emoji_image'
            emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record['realm_id'],
                emoji_file_name=record['file_name'])
            key.key = emoji_path
        else:
            # Should be kept in sync with its equivalent in zerver/lib/uploads in the
            # function 'upload_message_image'
            s3_file_name = "/".join([
                str(record['realm_id']),
                random_name(18),
                sanitize_name(os.path.basename(record['path']))
            ])
            key.key = s3_file_name
            path_maps['attachment_path'][record['path']] = s3_file_name

        user_profile_id = int(record['user_profile_id'])
        # Support email gateway bot and other cross-realm messages
        if user_profile_id in id_maps["user_profile"]:
            logging.info("Uploaded by ID mapped user: %s!" %
                         (user_profile_id, ))
            user_profile_id = id_maps["user_profile"][user_profile_id]
        user_profile = get_user_profile_by_id(user_profile_id)
        key.set_metadata("user_profile_id", str(user_profile.id))
        key.set_metadata("realm_id", str(user_profile.realm_id))
        key.set_metadata("orig_last_modified", record['last_modified'])

        headers = {'Content-Type': record['content_type']}

        key.set_contents_from_filename(os.path.join(import_dir,
                                                    record['path']),
                                       headers=headers)

        if processing_avatars:
            # TODO: Ideally, we'd do this in a separate pass, after
            # all the avatars have been uploaded, since we may end up
            # unnecssarily resizing images just before the medium-size
            # image in the export is uploaded.  See the local uplods
            # code path for more notes.
            upload_backend.ensure_medium_avatar_image(
                user_profile=user_profile)
예제 #11
0
def import_uploads_local(import_dir: Path,
                         processing_avatars: bool = False,
                         processing_emojis: bool = False) -> None:
    records_filename = os.path.join(import_dir, "records.json")
    with open(records_filename) as records_file:
        records = ujson.loads(records_file.read())

    re_map_foreign_keys_internal(records,
                                 'records',
                                 'realm_id',
                                 related_table="realm",
                                 id_field=True)
    if not processing_emojis:
        re_map_foreign_keys_internal(records,
                                     'records',
                                     'user_profile_id',
                                     related_table="user_profile",
                                     id_field=True)
    for record in records:
        if processing_avatars:
            # For avatars, we need to rehash the user ID with the
            # new server's avatar salt
            avatar_path = user_avatar_path_from_ids(record['user_profile_id'],
                                                    record['realm_id'])
            file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars",
                                     avatar_path)
            if record['s3_path'].endswith('.original'):
                file_path += '.original'
            else:
                file_path += '.png'
        elif processing_emojis:
            # For emojis we follow the function 'upload_emoji_image'
            emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record['realm_id'],
                emoji_file_name=record['file_name'])
            file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars",
                                     emoji_path)
        else:
            # Should be kept in sync with its equivalent in zerver/lib/uploads in the
            # function 'upload_message_image'
            s3_file_name = "/".join([
                str(record['realm_id']),
                random_name(18),
                sanitize_name(os.path.basename(record['path']))
            ])
            file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files",
                                     s3_file_name)
            path_maps['attachment_path'][record['path']] = s3_file_name

        orig_file_path = os.path.join(import_dir, record['path'])
        if not os.path.exists(os.path.dirname(file_path)):
            subprocess.check_call(["mkdir", "-p", os.path.dirname(file_path)])
        shutil.copy(orig_file_path, file_path)

    if processing_avatars:
        # Ensure that we have medium-size avatar images for every
        # avatar.  TODO: This implementation is hacky, both in that it
        # does get_user_profile_by_id for each user, and in that it
        # might be better to require the export to just have these.
        upload_backend = LocalUploadBackend()
        for record in records:
            if record['s3_path'].endswith('.original'):
                user_profile = get_user_profile_by_id(
                    record['user_profile_id'])
                avatar_path = user_avatar_path_from_ids(
                    user_profile.id, record['realm_id'])
                medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR,
                                                "avatars",
                                                avatar_path) + '-medium.png'
                if os.path.exists(medium_file_path):
                    # We remove the image here primarily to deal with
                    # issues when running the import script multiple
                    # times in development (where one might reuse the
                    # same realm ID from a previous iteration).
                    os.remove(medium_file_path)
                upload_backend.ensure_medium_avatar_image(
                    user_profile=user_profile)
예제 #12
0
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool=False,
                      processing_emojis: bool=False) -> None:
    upload_backend = S3UploadBackend()
    conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
    bucket = conn.get_bucket(bucket_name, validate=True)

    records_filename = os.path.join(import_dir, "records.json")
    with open(records_filename) as records_file:
        records = ujson.loads(records_file.read())

    re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm",
                                 id_field=True)
    re_map_foreign_keys_internal(records, 'records', 'user_profile_id',
                                 related_table="user_profile", id_field=True)
    for record in records:
        key = Key(bucket)

        if processing_avatars:
            # For avatars, we need to rehash the user's email with the
            # new server's avatar salt
            avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id'])
            key.key = avatar_path
            if record['s3_path'].endswith('.original'):
                key.key += '.original'
        if processing_emojis:
            # For emojis we follow the function 'upload_emoji_image'
            emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record['realm_id'],
                emoji_file_name=record['file_name'])
            key.key = emoji_path
        else:
            # Should be kept in sync with its equivalent in zerver/lib/uploads in the
            # function 'upload_message_image'
            s3_file_name = "/".join([
                str(record['realm_id']),
                random_name(18),
                sanitize_name(os.path.basename(record['path']))
            ])
            key.key = s3_file_name
            path_maps['attachment_path'][record['path']] = s3_file_name

        user_profile_id = int(record['user_profile_id'])
        # Support email gateway bot and other cross-realm messages
        if user_profile_id in id_maps["user_profile"]:
            logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,))
            user_profile_id = id_maps["user_profile"][user_profile_id]
        user_profile = get_user_profile_by_id(user_profile_id)
        key.set_metadata("user_profile_id", str(user_profile.id))
        key.set_metadata("realm_id", str(user_profile.realm_id))
        key.set_metadata("orig_last_modified", record['last_modified'])

        headers = {'Content-Type': record['content_type']}

        key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers)

        if processing_avatars:
            # TODO: Ideally, we'd do this in a separate pass, after
            # all the avatars have been uploaded, since we may end up
            # unnecssarily resizing images just before the medium-size
            # image in the export is uploaded.  See the local uplods
            # code path for more notes.
            upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
예제 #13
0
def import_uploads_local(import_dir: Path, processing_avatars: bool=False,
                         processing_emojis: bool=False) -> None:
    records_filename = os.path.join(import_dir, "records.json")
    with open(records_filename) as records_file:
        records = ujson.loads(records_file.read())

    re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm",
                                 id_field=True)
    if not processing_emojis:
        re_map_foreign_keys_internal(records, 'records', 'user_profile_id',
                                     related_table="user_profile", id_field=True)
    for record in records:
        if processing_avatars:
            # For avatars, we need to rehash the user ID with the
            # new server's avatar salt
            avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id'])
            file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path)
            if record['s3_path'].endswith('.original'):
                file_path += '.original'
            else:
                file_path += '.png'
        elif processing_emojis:
            # For emojis we follow the function 'upload_emoji_image'
            emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record['realm_id'],
                emoji_file_name=record['file_name'])
            file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", emoji_path)
        else:
            # Should be kept in sync with its equivalent in zerver/lib/uploads in the
            # function 'upload_message_image'
            s3_file_name = "/".join([
                str(record['realm_id']),
                random_name(18),
                sanitize_name(os.path.basename(record['path']))
            ])
            file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", s3_file_name)
            path_maps['attachment_path'][record['path']] = s3_file_name

        orig_file_path = os.path.join(import_dir, record['path'])
        if not os.path.exists(os.path.dirname(file_path)):
            subprocess.check_call(["mkdir", "-p", os.path.dirname(file_path)])
        shutil.copy(orig_file_path, file_path)

    if processing_avatars:
        # Ensure that we have medium-size avatar images for every
        # avatar.  TODO: This implementation is hacky, both in that it
        # does get_user_profile_by_id for each user, and in that it
        # might be better to require the export to just have these.
        upload_backend = LocalUploadBackend()
        for record in records:
            if record['s3_path'].endswith('.original'):
                user_profile = get_user_profile_by_id(record['user_profile_id'])
                avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id'])
                medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars",
                                                avatar_path) + '-medium.png'
                if os.path.exists(medium_file_path):
                    # We remove the image here primarily to deal with
                    # issues when running the import script multiple
                    # times in development (where one might reuse the
                    # same realm ID from a previous iteration).
                    os.remove(medium_file_path)
                upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
예제 #14
0
def process_message_attachments(
    attachments: List[Dict[str, Any]],
    realm_id: int,
    message_id: int,
    user_id: int,
    user_handler: UserHandler,
    zerver_attachment: List[ZerverFieldsT],
    uploads_list: List[ZerverFieldsT],
    mattermost_data_dir: str,
    output_dir: str,
) -> Tuple[str, bool]:
    has_image = False

    markdown_links = []

    for attachment in attachments:
        attachment_path = attachment["path"]
        attachment_full_path = os.path.join(mattermost_data_dir, "data", attachment_path)

        file_name = attachment_path.split("/")[-1]
        file_ext = f'.{file_name.split(".")[-1]}'

        if file_ext.lower() in IMAGE_EXTENSIONS:
            has_image = True

        s3_path = "/".join(
            [
                str(realm_id),
                format(random.randint(0, 255), "x"),
                secrets.token_urlsafe(18),
                sanitize_name(file_name),
            ]
        )
        content_for_link = f"[{file_name}](/user_uploads/{s3_path})"

        markdown_links.append(content_for_link)

        fileinfo = {
            "name": file_name,
            "size": os.path.getsize(attachment_full_path),
            "created": os.path.getmtime(attachment_full_path),
        }

        upload = dict(
            path=s3_path,
            realm_id=realm_id,
            content_type=None,
            user_profile_id=user_id,
            last_modified=fileinfo["created"],
            user_profile_email=user_handler.get_user(user_id=user_id)["email"],
            s3_path=s3_path,
            size=fileinfo["size"],
        )
        uploads_list.append(upload)

        build_attachment(
            realm_id=realm_id,
            message_ids={message_id},
            user_id=user_id,
            fileinfo=fileinfo,
            s3_path=s3_path,
            zerver_attachment=zerver_attachment,
        )

        # Copy the attachment file to output_dir
        attachment_out_path = os.path.join(output_dir, "uploads", s3_path)
        os.makedirs(os.path.dirname(attachment_out_path), exist_ok=True)
        shutil.copyfile(attachment_full_path, attachment_out_path)

    content = "\n".join(markdown_links)

    return content, has_image
예제 #15
0
def process_message_attachment(
    upload: Dict[str, Any],
    realm_id: int,
    message_id: int,
    user_id: int,
    user_handler: UserHandler,
    zerver_attachment: List[ZerverFieldsT],
    uploads_list: List[ZerverFieldsT],
    upload_id_to_upload_data_map: Dict[str, Dict[str, Any]],
    output_dir: str,
) -> Tuple[str, bool]:
    upload_file_data = upload_id_to_upload_data_map[upload["_id"]]
    file_name = upload["name"]
    file_ext = f'.{upload["type"].split("/")[-1]}'

    has_image = False
    if file_ext.lower() in IMAGE_EXTENSIONS:
        has_image = True

    s3_path = "/".join([
        str(realm_id),
        format(random.randint(0, 255), "x"),
        secrets.token_urlsafe(18),
        sanitize_name(file_name),
    ])

    # Build the attachment from chunks and save it to s3_path.
    file_out_path = os.path.join(output_dir, "uploads", s3_path)
    os.makedirs(os.path.dirname(file_out_path), exist_ok=True)
    with open(file_out_path, "wb") as upload_file:
        upload_file.write(b"".join(upload_file_data["chunk"]))

    attachment_content = (
        f'{upload_file_data["description"]}\n\n[{file_name}](/user_uploads/{s3_path})'
    )

    fileinfo = {
        "name": file_name,
        "size": upload_file_data["size"],
        "created": float(upload_file_data["_updatedAt"].timestamp()),
    }

    upload = dict(
        path=s3_path,
        realm_id=realm_id,
        content_type=upload["type"],
        user_profile_id=user_id,
        last_modified=fileinfo["created"],
        user_profile_email=user_handler.get_user(user_id=user_id)["email"],
        s3_path=s3_path,
        size=fileinfo["size"],
    )
    uploads_list.append(upload)

    build_attachment(
        realm_id=realm_id,
        message_ids={message_id},
        user_id=user_id,
        fileinfo=fileinfo,
        s3_path=s3_path,
        zerver_attachment=zerver_attachment,
    )

    return attachment_content, has_image
예제 #16
0
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool=False,
                      processing_emojis: bool=False) -> None:
    upload_backend = S3UploadBackend()
    conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
    bucket = conn.get_bucket(bucket_name, validate=True)

    records_filename = os.path.join(import_dir, "records.json")
    with open(records_filename) as records_file:
        records = ujson.loads(records_file.read())

    re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm",
                                 id_field=True)
    timestamp = datetime_to_timestamp(timezone_now())
    if not processing_emojis:
        re_map_foreign_keys_internal(records, 'records', 'user_profile_id',
                                     related_table="user_profile", id_field=True)
    for record in records:
        key = Key(bucket)

        if processing_avatars:
            # For avatars, we need to rehash the user's email with the
            # new server's avatar salt
            avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id'])
            key.key = avatar_path
            if record['s3_path'].endswith('.original'):
                key.key += '.original'
        elif processing_emojis:
            # For emojis we follow the function 'upload_emoji_image'
            emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record['realm_id'],
                emoji_file_name=record['file_name'])
            key.key = emoji_path
            record['last_modified'] = timestamp
        else:
            # Should be kept in sync with its equivalent in zerver/lib/uploads in the
            # function 'upload_message_image'
            s3_file_name = "/".join([
                str(record['realm_id']),
                random_name(18),
                sanitize_name(os.path.basename(record['path']))
            ])
            key.key = s3_file_name
            path_maps['attachment_path'][record['s3_path']] = s3_file_name

        # Exported custom emoji from tools like Slack don't have
        # the data for what user uploaded them in `user_profile_id`.
        if not processing_emojis:
            user_profile_id = int(record['user_profile_id'])
            # Support email gateway bot and other cross-realm messages
            if user_profile_id in id_maps["user_profile"]:
                logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,))
                user_profile_id = id_maps["user_profile"][user_profile_id]
            user_profile = get_user_profile_by_id(user_profile_id)
            key.set_metadata("user_profile_id", str(user_profile.id))

        key.set_metadata("orig_last_modified", record['last_modified'])
        key.set_metadata("realm_id", str(record['realm_id']))

        # Zulip exports will always have a content-type, but third-party exports might not.
        content_type = record.get("content_type", guess_type(record['s3_path'])[0])
        headers = {'Content-Type': content_type}

        key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers)

    if processing_avatars:
        # Ensure that we have medium-size avatar images for every
        # avatar.  TODO: This implementation is hacky, both in that it
        # does get_user_profile_by_id for each user, and in that it
        # might be better to require the export to just have these.
        upload_backend = S3UploadBackend()
        for record in records:
            if record['s3_path'].endswith('.original'):
                user_profile = get_user_profile_by_id(record['user_profile_id'])
                upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
예제 #17
0
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool=False,
                      processing_emojis: bool=False) -> None:
    upload_backend = S3UploadBackend()
    conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
    bucket = conn.get_bucket(bucket_name, validate=True)

    records_filename = os.path.join(import_dir, "records.json")
    with open(records_filename) as records_file:
        records = ujson.loads(records_file.read())

    re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm",
                                 id_field=True)
    timestamp = datetime_to_timestamp(timezone_now())
    if not processing_emojis:
        re_map_foreign_keys_internal(records, 'records', 'user_profile_id',
                                     related_table="user_profile", id_field=True)
    for record in records:
        key = Key(bucket)

        if processing_avatars:
            # For avatars, we need to rehash the user's email with the
            # new server's avatar salt
            avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id'])
            key.key = avatar_path
            if record['s3_path'].endswith('.original'):
                key.key += '.original'
        elif processing_emojis:
            # For emojis we follow the function 'upload_emoji_image'
            emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record['realm_id'],
                emoji_file_name=record['file_name'])
            key.key = emoji_path
            record['last_modified'] = timestamp
        else:
            # Should be kept in sync with its equivalent in zerver/lib/uploads in the
            # function 'upload_message_image'
            s3_file_name = "/".join([
                str(record['realm_id']),
                random_name(18),
                sanitize_name(os.path.basename(record['path']))
            ])
            key.key = s3_file_name
            path_maps['attachment_path'][record['s3_path']] = s3_file_name

        # Exported custom emoji from tools like Slack don't have
        # the data for what user uploaded them in `user_profile_id`.
        if not processing_emojis:
            user_profile_id = int(record['user_profile_id'])
            # Support email gateway bot and other cross-realm messages
            if user_profile_id in id_maps["user_profile"]:
                logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,))
                user_profile_id = id_maps["user_profile"][user_profile_id]
            user_profile = get_user_profile_by_id(user_profile_id)
            key.set_metadata("user_profile_id", str(user_profile.id))

        key.set_metadata("orig_last_modified", record['last_modified'])
        key.set_metadata("realm_id", str(record['realm_id']))

        # Zulip exports will always have a content-type, but third-party exports might not.
        content_type = record.get("content_type", guess_type(record['s3_path'])[0])
        headers = {'Content-Type': content_type}

        key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers)

    if processing_avatars:
        # Ensure that we have medium-size avatar images for every
        # avatar.  TODO: This implementation is hacky, both in that it
        # does get_user_profile_by_id for each user, and in that it
        # might be better to require the export to just have these.
        upload_backend = S3UploadBackend()
        for record in records:
            if record['s3_path'].endswith('.original'):
                user_profile = get_user_profile_by_id(record['user_profile_id'])
                upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
예제 #18
0
def import_uploads(import_dir: Path, processing_avatars: bool=False,
                   processing_emojis: bool=False) -> None:
    if processing_avatars and processing_emojis:
        raise AssertionError("Cannot import avatars and emojis at the same time!")
    if processing_avatars:
        logging.info("Importing avatars")
    elif processing_emojis:
        logging.info("Importing emojis")
    else:
        logging.info("Importing uploaded files")

    records_filename = os.path.join(import_dir, "records.json")
    with open(records_filename) as records_file:
        records = ujson.loads(records_file.read())  # type: List[Dict[str, Any]]
    timestamp = datetime_to_timestamp(timezone_now())

    re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm",
                                 id_field=True)
    if not processing_emojis:
        re_map_foreign_keys_internal(records, 'records', 'user_profile_id',
                                     related_table="user_profile", id_field=True)

    s3_uploads = settings.LOCAL_UPLOADS_DIR is None

    if s3_uploads:
        if processing_avatars or processing_emojis:
            bucket_name = settings.S3_AVATAR_BUCKET
        else:
            bucket_name = settings.S3_AUTH_UPLOADS_BUCKET
        conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
        bucket = conn.get_bucket(bucket_name, validate=True)

    count = 0
    for record in records:
        count += 1
        if count % 1000 == 0:
            logging.info("Processed %s/%s uploads" % (count, len(records)))

        if processing_avatars:
            # For avatars, we need to rehash the user ID with the
            # new server's avatar salt
            relative_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id'])
            if record['s3_path'].endswith('.original'):
                relative_path += '.original'
            else:
                relative_path += '.png'
        elif processing_emojis:
            # For emojis we follow the function 'upload_emoji_image'
            relative_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record['realm_id'],
                emoji_file_name=record['file_name'])
            record['last_modified'] = timestamp
        else:
            # Should be kept in sync with its equivalent in zerver/lib/uploads in the
            # function 'upload_message_file'
            relative_path = "/".join([
                str(record['realm_id']),
                random_name(18),
                sanitize_name(os.path.basename(record['path']))
            ])
            path_maps['attachment_path'][record['s3_path']] = relative_path

        if s3_uploads:
            key = Key(bucket)
            key.key = relative_path
            # Exported custom emoji from tools like Slack don't have
            # the data for what user uploaded them in `user_profile_id`.
            if not processing_emojis:
                user_profile_id = int(record['user_profile_id'])
                # Support email gateway bot and other cross-realm messages
                if user_profile_id in ID_MAP["user_profile"]:
                    logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,))
                    user_profile_id = ID_MAP["user_profile"][user_profile_id]
                user_profile = get_user_profile_by_id(user_profile_id)
                key.set_metadata("user_profile_id", str(user_profile.id))

            if 'last_modified' in record:
                key.set_metadata("orig_last_modified", record['last_modified'])
            key.set_metadata("realm_id", str(record['realm_id']))

            # Zulip exports will always have a content-type, but third-party exports might not.
            content_type = record.get("content_type")
            if content_type is None:
                content_type = guess_type(record['s3_path'])[0]
            headers = {'Content-Type': content_type}  # type: Dict[str, Any]

            key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers)
        else:
            if processing_avatars or processing_emojis:
                file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", relative_path)
            else:
                file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", relative_path)
            orig_file_path = os.path.join(import_dir, record['path'])
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            shutil.copy(orig_file_path, file_path)

    if processing_avatars:
        from zerver.lib.upload import upload_backend
        # Ensure that we have medium-size avatar images for every
        # avatar.  TODO: This implementation is hacky, both in that it
        # does get_user_profile_by_id for each user, and in that it
        # might be better to require the export to just have these.
        for record in records:
            if record['s3_path'].endswith('.original'):
                user_profile = get_user_profile_by_id(record['user_profile_id'])
                if settings.LOCAL_UPLOADS_DIR is not None:
                    avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id'])
                    medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars",
                                                    avatar_path) + '-medium.png'
                    if os.path.exists(medium_file_path):
                        # We remove the image here primarily to deal with
                        # issues when running the import script multiple
                        # times in development (where one might reuse the
                        # same realm ID from a previous iteration).
                        os.remove(medium_file_path)
                upload_backend.ensure_medium_avatar_image(user_profile=user_profile)