示例#1
0
    def make_collection(self, user, filename, info, rec_info=None):
        """Create collection.

        :param User user: user
        :param str filename: WARC archive filename
        :param dict info: collection information
        :param rec_info: recording information
        :type: dict or None

        :returns: collection
        :rtype: Collection
        """
        self.prepare_coll_desc(filename, info, rec_info)
        public = info.get('public', False)
        public_index = info.get('public_index', False)

        info['id'] = sanitize_title(info['title'])
        collection = user.create_collection(info['id'],
                                            title=info['title'],
                                            desc=info['desc'],
                                            public=public,
                                            public_index=public_index,
                                            allow_dupe=True)

        info['id'] = collection.name
        info['type'] = 'collection'

        collection.data['_updated_at'] = info.get('updated_at')
        collection.data['_created_at'] = info.get('created_at')

        return collection
示例#2
0
    def make_collection(self, user, filename, info, rec_info=None):
        """Create collection.

        :param User user: user
        :param str filename: WARC archive filename
        :param dict info: collection information
        :param rec_info: recording information
        :type: dict or None

        :returns: collection
        :rtype: Collection
        """
        self.prepare_coll_desc(filename, info, rec_info)
        public = info.get('public', False)
        public_index = info.get('public_index', False)

        info['id'] = sanitize_title(info['title'])
        collection = user.create_collection(info['id'],
                                       title=info['title'],
                                       desc=info['desc'],
                                       public=public,
                                       public_index=public_index,
                                       allow_dupe=True)

        info['id'] = collection.name
        info['type'] = 'collection'

        collection.data['_updated_at'] = info.get('updated_at')
        collection.data['_created_at'] = info.get('created_at')

        return collection
示例#3
0
    def update_list_slug(self, new_title, bookmark_list):
        old_title = bookmark_list.get_prop('title')
        if old_title == new_title:
            return False

        new_slug = self.list_names.rename(bookmark_list, sanitize_title(new_title))
        return new_slug is not None
示例#4
0
    def get_list_slug(self, title):
        if not title:
            return

        slug = sanitize_title(title)
        if not slug:
            return

        return self.list_names.reserve_obj_name(slug, allow_dupe=True)
示例#5
0
    def _runner_init(self):
        os.environ['WR_USER_CONFIG'] = 'pkg://webrecorder/config/standalone_recorder.yaml'
        os.environ['SECRET_KEY'] = base64.b32encode(os.urandom(75)).decode('utf-8')

        os.environ['RECORD_ROOT'] = os.path.join(self.root_dir, 'warcs', '')
        os.environ['STORAGE_ROOT'] = os.path.join(self.root_dir, 'storage', '')

        os.environ['REDIS_BROWSER_URL'] = 'redis://*****:*****@localhost'.format(self.default_user),
              username=self.default_user,
              passwd='LocalUser1',
              role='admin',
              name=self.default_user)

        print('DEFAULT_USER='******'max_size', max_size)
        except Exception as e:
            print(e)

        os.environ['AUTO_LOGIN_USER'] = self.default_user
    def _create_coll(self, user, coll_title, public=False):
        # Collection
        params = {'title': coll_title, 'public': public}

        coll_name = sanitize_title(coll_title)

        res = self.testapp.post_json(
            '/api/v1/collections?user={0}'.format(user), params=params)
        collection = res.json['collection']

        assert collection['public'] == public
        assert collection['public_index'] == False

        return coll_name
示例#7
0
    def update_list_slug(self, new_title, bookmark_list):
        """Rename list field name.

        :param str new_title: new field name
        :param BookmarkList bookmark_list: list of bookmarks

        :returns: whether successful or not
        :rtype: bool or None
        """
        old_title = bookmark_list.get_prop('title')
        if old_title == new_title:
            return False

        new_slug = self.list_names.rename(bookmark_list, sanitize_title(new_title))
        return new_slug is not None
示例#8
0
    def update_list_slug(self, new_title, bookmark_list):
        """Rename list field name.

        :param str new_title: new field name
        :param BookmarkList bookmark_list: list of bookmarks

        :returns: whether successful or not
        :rtype: bool or None
        """
        old_title = bookmark_list.get_prop('title')
        if old_title == new_title:
            return False

        new_slug = self.list_names.rename(bookmark_list, sanitize_title(new_title))
        return new_slug is not None
示例#9
0
    def get_list_slug(self, title):
        """Return reserved field name.

        :param str title: title

        :returns: reserved field name
        :rtype: str
        """
        if not title:
            return

        slug = sanitize_title(title)
        if not slug:
            return

        return self.list_names.reserve_obj_name(slug, allow_dupe=True)
示例#10
0
    def get_list_slug(self, title):
        """Return reserved field name.

        :param str title: title

        :returns: reserved field name
        :rtype: str
        """
        if not title:
            return

        slug = sanitize_title(title)
        if not slug:
            return

        return self.list_names.reserve_obj_name(slug, allow_dupe=True)
示例#11
0
    def get_move_temp_info(self, input_data):
        move_temp = input_data.get('moveTemp')

        if not move_temp:
            return None

        to_coll_title = input_data.get('toColl', '')
        to_coll = sanitize_title(to_coll_title)

        if not to_coll:
            raise ValidationException('invalid_coll_name')


        if not self.access.session_user.is_anon():
            raise ValidationException('invalid_user_import')

        return {'from_user': self.access.session_user.name,
                'to_coll': to_coll,
                'to_title': to_coll_title,
               }
示例#12
0
    def get_move_temp_info(self, input_data):
        move_temp = input_data.get('moveTemp')

        if not move_temp:
            return None

        to_coll_title = input_data.get('toColl', '')
        to_coll = sanitize_title(to_coll_title)

        if not to_coll:
            raise ValidationException('invalid_coll_name')


        if not self.access.session_user.is_anon():
            raise ValidationException('invalid_user_import')

        return {'from_user': self.access.session_user.name,
                'to_coll': to_coll,
                'to_title': to_coll_title,
               }
示例#13
0
    def make_collection(self, user, filename, info, rec_info=None):
        self.prepare_coll_desc(filename, info, rec_info)
        public = info.get('public', False)
        public_index = info.get('public_index', False)

        info['id'] = sanitize_title(info['title'])
        collection = user.create_collection(info['id'],
                                            title=info['title'],
                                            desc=info['desc'],
                                            public=public,
                                            public_index=public_index,
                                            allow_dupe=True)

        info['id'] = collection.name
        info['type'] = 'collection'

        collection.data['_updated_at'] = info.get('updated_at')
        collection.data['_created_at'] = info.get('created_at')

        return collection
示例#14
0
    def migrate_collection(self, user, old_user, old_coll):
        # get old coll info data
        old_info_key = 'c:{user}:{coll}:info'.format(user=old_user,
                                                     coll=old_coll)
        old_coll_data = self.old_redis.hgetall(old_info_key)

        if not old_coll_data or not old_coll_data.get('id'):
            print('  SKIPPING INVALID: ' + old_coll)
            return 0

        collection = user.create_collection(
            old_coll,
            allow_dupe=False,
            title=old_coll_data.get('title', ''),
            desc=old_coll_data.get('desc', ''),
            public=old_coll_data.get('r:@public') == '1')

        print('  New Collection Created: ' + collection.my_id)

        if old_coll_data.get('created_at'):
            collection.set_prop('created_at',
                                old_coll_data.get('created_at'),
                                update_ts=False)
        else:
            print('  OBJ ERR: created_at missing')

        #collection.set_prop('size', old_coll_data.get('size', '0'))

        collection.bookmarks_list = None

        recs = self.old_redis.smembers('c:{user}:{coll}:recs'.format(
            user=old_user, coll=old_coll))

        patch_recordings = {}

        total_size = 0

        for rec in recs:
            old_rec_base_key = 'r:{user}:{coll}:{rec}:'.format(user=old_user,
                                                               coll=old_coll,
                                                               rec=rec)
            print('    Processing Recording: ' + rec)
            recording, size = self.migrate_recording(collection, rec,
                                                     old_rec_base_key)

            total_size += size

            # track patch recordings
            if recording and recording.get_prop('rec_type') == 'patch':
                print('    Patch: yes')
                id_ = self.old_redis.hget(old_rec_base_key + 'info', 'id')
                patch_recordings[id_] = recording

        collection.set_prop('size', total_size, update_ts=False)

        # add bookmarks to Bookmarks list
        if hasattr(collection, 'pages_for_bookmarks'):
            # sort pages oldest to newest
            pages = sorted(collection.pages_for_bookmarks,
                           key=lambda x: x.get('timestamp') or x.get('ts', ''))

            for page in pages:
                bookmark = collection.bookmarks_list.create_bookmark(page)

        # map source to patch recordings, if any
        if not patch_recordings:
            return total_size

        all_recordings = collection.get_recordings()
        for recording in all_recordings:
            title = recording.get_prop('title')
            if not title:
                continue

            patch_title = 'patch-of-' + sanitize_title(title)
            patch_recording = patch_recordings.get(patch_title)
            if patch_recording:
                print('Patch Mapped ({0}) {1} -> {2}'.format(
                    title, recording.my_id, patch_recording.my_id))
                recording.set_patch_recording(patch_recording, update_ts=False)

        return total_size
示例#15
0
 def sanitize_title(self, title):
     return sanitize_title(title)
示例#16
0
    def migrate_collection(self, user, old_user, old_coll):
        # get old coll info data
        old_info_key = 'c:{user}:{coll}:info'.format(user=old_user, coll=old_coll)
        old_coll_data = self.old_redis.hgetall(old_info_key)

        if not old_coll_data or not old_coll_data.get('id'):
            print('  SKIPPING INVALID: ' + old_coll)
            return 0

        collection = user.create_collection(old_coll,
                                            allow_dupe=False,
                                            title=old_coll_data.get('title', ''),
                                            desc=old_coll_data.get('desc', ''),
                                            public=old_coll_data.get('r:@public') == '1')

        print('  New Collection Created: ' + collection.my_id)

        if old_coll_data.get('created_at'):
            collection.set_prop('created_at', old_coll_data.get('created_at'), update_ts=False)
        else:
            print('  OBJ ERR: created_at missing')

        #collection.set_prop('size', old_coll_data.get('size', '0'))

        collection.bookmarks_list = None

        recs = self.old_redis.smembers('c:{user}:{coll}:recs'.format(user=old_user, coll=old_coll))

        patch_recordings = {}

        total_size = 0

        for rec in recs:
            old_rec_base_key = 'r:{user}:{coll}:{rec}:'.format(user=old_user, coll=old_coll, rec=rec)
            print('    Processing Recording: ' + rec)
            recording, size = self.migrate_recording(collection, rec, old_rec_base_key)

            total_size += size

            # track patch recordings
            if recording and recording.get_prop('rec_type') == 'patch':
                print('    Patch: yes')
                id_ = self.old_redis.hget(old_rec_base_key + 'info', 'id')
                patch_recordings[id_] = recording


        collection.set_prop('size', total_size, update_ts=False)

        # add bookmarks to Bookmarks list
        if hasattr(collection, 'pages_for_bookmarks'):
            # sort pages oldest to newest
            pages = sorted(collection.pages_for_bookmarks, key=lambda x: x.get('timestamp') or x.get('ts', ''))

            for page in pages:
                bookmark = collection.bookmarks_list.create_bookmark(page)

        # map source to patch recordings, if any
        if not patch_recordings:
            return total_size

        all_recordings = collection.get_recordings()
        for recording in all_recordings:
            title = recording.get_prop('title')
            if not title:
                continue

            patch_title = 'patch-of-' + sanitize_title(title)
            patch_recording = patch_recordings.get(patch_title)
            if patch_recording:
                print('Patch Mapped ({0}) {1} -> {2}'.format(title, recording.my_id, patch_recording.my_id))
                recording.set_patch_recording(patch_recording, update_ts=False)

        return total_size