def test_drupal_serializer(video_record_metadata, deposit_metadata): """Test drupal serializer.""" duration = '00:01:00.140' report_number = 'RN-01' video_record_metadata.update(deposit_metadata) video_record_metadata.update({ 'report_number': [report_number], '$schema': Video.get_record_schema(), 'duration': duration, 'contributors': [{ 'name': 'paperone', 'role': 'Director' }, { 'name': 'topolino', 'role': 'Music by' }, { 'name': 'nonna papera', 'role': 'Producer' }, { 'name': 'pluto', 'role': 'Director' }, { 'name': 'zio paperino', 'role': 'Producer' }], }) expected = dict( caption_en='in tempor reprehenderit enim eiusmod', caption_fr='france caption', copyright_date='2017', copyright_holder='CERN', creation_date='2017-03-02', directors='paperone, pluto', entry_date='2017-09-25', id=report_number, keywords='keyword1, keyword2', license_body='GPLv2', license_url='http://license.cern.ch', producer='nonna papera, zio paperino', record_id='1', title_en='My english title', title_fr='My french title', type='video', video_length=duration, ) # Proper publication date serializer = VideoDrupal(video_record_metadata) data = serializer.format()['entries'][0]['entry'] data = {k: data[k] for k in data if k in expected} assert data == expected # Empty publication date del video_record_metadata['publication_date'] expected['creation_date'] = '' serializer = VideoDrupal(video_record_metadata) data = serializer.format()['entries'][0]['entry'] data = {k: data[k] for k in data if k in expected} assert data == expected
def test_migrate_record(frames_required, api_app, location, datadir, es, users): """Test migrate date.""" # [[ migrate the project ]] data = load_json(datadir, 'cds_records_demo_1_project.json') dump = CDSRecordDump(data=data[0]) project = CDSRecordDumpLoader.create(dump=dump) p_id = project.id assert project['$schema'] == Project.get_record_schema() assert project['publication_date'] == '2016-01-05' assert 'license' not in project assert 'copyright' not in project assert project['_cds'] == { "state": { "file_transcode": "SUCCESS", "file_video_extract_frames": "SUCCESS", "file_video_metadata_extraction": "SUCCESS" }, 'modified_by': users[0], } # check project deposit deposit_project_uuid = PersistentIdentifier.query.filter_by( pid_type='depid', object_type='rec').one().object_uuid deposit_project = Record.get_record(deposit_project_uuid) assert Project._schema in deposit_project['$schema'] assert project.revision_id == deposit_project[ '_deposit']['pid']['revision_id'] assert deposit_project['_deposit']['created_by'] == 1 assert deposit_project['_deposit']['owners'] == [1] assert deposit_project['_files'] == [] # [[ migrate the video ]] data = load_json(datadir, 'cds_records_demo_1_video.json') dump = CDSRecordDump(data=data[0]) db.session.commit() def check_symlinks(video): symlinks_creator = SymlinksCreator() files = list(symlinks_creator._get_list_files(record=video)) assert len(files) == 1 for file_ in files: path = symlinks_creator._build_link_path( symlinks_creator._symlinks_location, video, file_['key']) assert os.path.lexists(path) def check_gif(video, mock_gif): # called only once for deposit (_, _, mock_args) = mock_gif.mock_calls[0] # check gif record video = CDSRecord(dict(video), video.model) # check gif deposit deposit = deposit_video_resolver(video['_deposit']['id']) master_video = CDSVideosFilesIterator.get_master_video_file(deposit) assert mock_args['master_id'] == master_video['version_id'] assert str(deposit.files.bucket.id) == mock_args['bucket'] # assert mock_args['bucket'].id == deposit.files.bucket.id assert len(mock_args['frames']) == 10 assert 'output_dir' in mock_args migration_streams = get_migration_streams(datadir=datadir) with mock.patch.object(DataCiteProvider, 'register'), \ mock.patch.object(CDSRecordDumpLoader, '_create_frame', side_effect=get_frames), \ mock.patch.object(CDSRecordDumpLoader, '_get_minimum_frames', return_value=frames_required) as mock_frames, \ mock.patch.object( ExtractFramesTask, '_create_gif') as mock_gif, \ mock.patch.object( CDSRecordDumpLoader, '_get_migration_file_stream_and_size', side_effect=migration_streams), \ mock.patch.object(CDSRecordDumpLoader, '_clean_file_list'): video = CDSRecordDumpLoader.create(dump=dump) assert mock_frames.called is True db.session.add(video.model) video_id = video.id # check smil file smil_obj = ObjectVersion.query.filter_by( key='CERN-MOVIE-2012-193-001.smil', is_head=True).one() storage = smil_obj.file.storage() assert '<video src' in storage.open().read().decode('utf-8') # check video symlinks check_symlinks(video) # check gif check_gif(video, mock_gif) # check project project = Record.get_record(p_id) assert project['videos'] == [ {'$ref': 'https://cds.cern.ch/api/record/1495143'} ] assert video['$schema'] == Video.get_record_schema() assert video['date'] == '2012-11-21' # metadata data assert video['publication_date'] == '2017-07-13' # creation date (DB) assert video['_project_id'] == '2093596' assert video['license'] == [{ 'license': 'CERN', 'url': 'http://copyright.web.cern.ch', }] assert video['copyright'] == { 'holder': 'CERN', 'year': '2012', 'url': 'http://copyright.web.cern.ch', } assert video['description'] == '' assert 'doi' in video assert video['_cds']['state'] == { "file_transcode": "SUCCESS", "file_video_extract_frames": "SUCCESS", "file_video_metadata_extraction": "SUCCESS" } assert 'extracted_metadata' in video['_cds'] def check_files(video): bucket = CDSRecordDumpLoader._get_bucket(record=video) files = [dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket)] for file_ in files: assert as_bucket(file_['bucket_id']) is not None assert 'checksum' in file_ assert 'content_type' in file_ assert 'context_type' in file_ assert FileInstance.query.filter_by( id=file_['file_id']) is not None assert 'key' in file_ assert 'links' in file_ assert 'content_type' in file_ assert 'context_type' in file_ assert 'media_type' in file_ assert 'tags' in file_ # check extracted metadata master_video = CDSVideosFilesIterator.get_master_video_file(video) assert any([key in master_video['tags'] for key in ExtractMetadataTask._all_keys]) assert any([key in video['_cds']['extracted_metadata'] for key in ExtractMetadataTask._all_keys]) def check_buckets(record, deposit): def get(key, record): bucket = CDSRecordDumpLoader._get_bucket(record=record) files = [dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket)] return [file_[key] for file_ in files] def check(record, deposit, file_key, different=None): values_record = set(get(file_key, record)) values_deposit = set(get(file_key, deposit)) difference = len(values_record - values_deposit) assert different == difference def check_tag_master(record): bucket = CDSRecordDumpLoader._get_bucket(record=record) master = CDSVideosFilesIterator.get_master_video_file(record) files = [dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket) if obj.get_tags().get('master')] assert all([file_['tags']['master'] == master['version_id'] for file_ in files]) # 1 bucket record != 1 bucket deposit check(record, deposit, 'bucket_id', 1) # all file_id are the same except the smil file (only in record) check(record, deposit, 'file_id', 1) check(record, deposit, 'key', 1) # 18 object_version record != 17 object_version deposit check(record, deposit, 'version_id', 18) # check tag 'master' where is pointing check_tag_master(record) check_tag_master(deposit) def check_first_level_files(record): [master] = [file_ for file_ in deposit_video['_files'] if file_['context_type'] == 'master'] assert len(master['subformat']) == 5 assert len(master['frame']) == 10 # TODO assert len(master['playlist']) == ?? assert len([file_ for file_ in deposit_video['_files'] if file_['context_type'] == 'master']) == 1 duration = float(record['_cds']['extracted_metadata']['duration']) for frame in master['frame']: assert float(frame['tags']['timestamp']) < duration assert float(frame['tags']['timestamp']) > 0 # check tag 'preset_quality' pqs = [form['tags']['preset_quality'] for form in master['subformat']] assert sorted(pqs) == sorted(['1080p', '240p', '360p', '480p', '720p']) # check tag 'display_aspect_ratio' dar = set([form['tags']['display_aspect_ratio'] for form in master['subformat']]) assert dar == {'16:9'} def check_pids(record): """Check pids.""" assert record['report_number'][0] == 'CERN-VIDEO-2012-193-001' assert PersistentIdentifier.query.filter_by( pid_value='CERN-VIDEO-2012-193-001').count() == 1 assert PersistentIdentifier.query.filter_by( pid_value='CERN-MOVIE-2012-193-001').count() == 1 db.session.commit() # check video deposit deposit_video_uuid = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'depid', PersistentIdentifier.object_uuid != str(deposit_project_uuid), PersistentIdentifier.object_type == 'rec' ).one().object_uuid deposit_video = Video.get_record(str(deposit_video_uuid)) assert Video._schema in deposit_video['$schema'] video = Record.get_record(video_id) assert video.revision_id == deposit_video[ '_deposit']['pid']['revision_id'] assert deposit_video['_deposit']['created_by'] == users[0] assert deposit_video['_deposit']['owners'] == [users[0]] assert deposit_video['_project_id'] == '2093596' assert len(video['_files']) == 2 assert len(deposit_video['_files']) == 2 check_files(video) check_files(deposit_video) check_buckets(video, deposit_video) check_first_level_files(video) check_first_level_files(deposit_video) check_pids(video) # try to edit video deposit_video = deposit_video_resolver(deposit_video['_deposit']['id']) deposit_video = deposit_video.edit() # try to edit project deposit_project = deposit_project_resolver( deposit_project['_deposit']['id']) deposit_project = deposit_project.edit() login_user(User.query.filter_by(id=users[0]).first()) deposit_video['title']['title'] = 'test' deposit_video = deposit_video.publish() _, record_video = deposit_video.fetch_published() assert record_video['title']['title'] == 'test'
def test_video_record_schema(app, db, api_project): """Test video record schema.""" (project, video_1, video_2) = api_project assert video_1.record_schema == Video.get_record_schema()
def test_migrate_record(app, location, datadir, es): """Test migrate date.""" # create the project data = load_json(datadir, 'cds_records_demo_1_project.json') dump = CDSRecordDump(data=data[0]) project = CDSRecordDumpLoader.create(dump=dump) p_id = project.id date = '2015-11-13' assert project['$schema'] == Project.get_record_schema() assert project['date'] == date assert project['publication_date'] == date assert 'license' not in project assert 'copyright' not in project assert project['_cds'] == { "state": { "file_transcode": "SUCCESS", "file_video_extract_frames": "SUCCESS", "file_video_metadata_extraction": "SUCCESS" }, 'modified_by': None, } # check project deposit deposit_project_uuid = PersistentIdentifier.query.filter_by( pid_type='depid', object_type='rec').one().object_uuid deposit_project = Record.get_record(deposit_project_uuid) assert Project._schema in deposit_project['$schema'] assert project.revision_id == deposit_project[ '_deposit']['pid']['revision_id'] assert deposit_project['_deposit']['created_by'] == -1 assert deposit_project['_deposit']['owners'] == [-1] assert deposit_project['_files'] == [] # create the video data = load_json(datadir, 'cds_records_demo_1_video.json') dump = CDSRecordDump(data=data[0]) def load_video(*args, **kwargs): return open(join(datadir, 'test.mp4'), 'rb') with mock.patch.object(DataCiteProvider, 'register') as mock_datacite, \ mock.patch.object( CDSRecordDumpLoader, '_get_migration_file_stream', return_value=load_video()): video = CDSRecordDumpLoader.create(dump=dump) # assert mock_datacite.called is True project = Record.get_record(p_id) assert project['videos'] == [ {'$ref': 'https://cds.cern.ch/api/record/1495143'} ] assert video['$schema'] == Video.get_record_schema() date = '2012-11-20' assert video['date'] == date assert video['publication_date'] == date assert video['_project_id'] == '2093596' assert video['license'] == [{ 'license': 'CERN', 'url': 'http://copyright.web.cern.ch', }] assert video['copyright'] == { 'holder': 'CERN', 'year': '2012', 'url': 'http://copyright.web.cern.ch', } assert video['description'] == '' assert 'doi' in video assert video['_cds']['state'] == { "file_transcode": "SUCCESS", "file_video_extract_frames": "SUCCESS", "file_video_metadata_extraction": "SUCCESS" } assert 'extracted_metadata' in video['_cds'] def check_files(video): bucket = CDSRecordDumpLoader._get_bucket(record=video) files = [dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket)] for file_ in files: assert as_bucket(file_['bucket_id']) is not None assert 'checksum' in file_ assert 'content_type' in file_ assert 'context_type' in file_ assert FileInstance.query.filter_by( id=file_['file_id']) is not None assert 'key' in file_ assert 'links' in file_ assert 'content_type' in file_ assert 'context_type' in file_ assert 'media_type' in file_ assert 'tags' in file_ # check extracted metadata master_video = CDSVideosFilesIterator.get_master_video_file(video) assert any([key in master_video['tags'] for key in ExtractMetadataTask._all_keys]) assert any([key in video['_cds']['extracted_metadata'] for key in ExtractMetadataTask._all_keys]) def check_buckets(record, deposit): def get(key, record): bucket = CDSRecordDumpLoader._get_bucket(record=record) files = [dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket)] return [file_[key] for file_ in files] def check(record, deposit, file_key, different=None): values_record = set(get(file_key, record)) values_deposit = set(get(file_key, deposit)) difference = len(values_record - values_deposit) assert different == difference def check_tag_master(record): bucket = CDSRecordDumpLoader._get_bucket(record=record) master = CDSVideosFilesIterator.get_master_video_file(record) files = [dump_object(obj) for obj in ObjectVersion.get_by_bucket(bucket=bucket) if obj.get_tags().get('master')] assert all([file_['tags']['master'] == master['version_id'] for file_ in files]) # 1 bucket record != 1 bucket deposit check(record, deposit, 'bucket_id', 1) # all file_id are the same except the smil file (only in record) check(record, deposit, 'file_id', 1) check(record, deposit, 'key', 1) # 18 object_version record != 17 object_version deposit check(record, deposit, 'version_id', 18) # check tag 'master' where is pointing check_tag_master(record) check_tag_master(deposit) def check_first_level_files(record): [master] = [file_ for file_ in deposit_video['_files'] if file_['context_type'] == 'master'] assert len(master['subformat']) == 5 assert len(master['frame']) == 10 # TODO assert len(master['playlist']) == ?? assert len([file_ for file_ in deposit_video['_files'] if file_['context_type'] == 'master']) == 1 duration = float(record['_cds']['extracted_metadata']['duration']) for frame in master['frame']: assert float(frame['tags']['timestamp']) < duration assert float(frame['tags']['timestamp']) > 0 # check video deposit deposit_video_uuid = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'depid', PersistentIdentifier.object_uuid != str(deposit_project_uuid), PersistentIdentifier.object_type == 'rec' ).one().object_uuid deposit_video = Video.get_record(str(deposit_video_uuid)) assert Video._schema in deposit_video['$schema'] assert video.revision_id == deposit_video[ '_deposit']['pid']['revision_id'] assert deposit_video['_deposit']['created_by'] == -1 assert deposit_video['_deposit']['owners'] == [-1] assert len(video['_files']) == 2 assert len(deposit_video['_files']) == 2 check_files(video) check_files(deposit_video) check_buckets(video, deposit_video) check_first_level_files(video) check_first_level_files(deposit_video) # try to edit video deposit_video = deposit_video_resolver(deposit_video['_deposit']['id']) deposit_video = deposit_video.edit() # try to edit project deposit_project = deposit_project_resolver( deposit_project['_deposit']['id']) deposit_project = deposit_project.edit() # try to publish again the video deposit_video['title']['title'] = 'test' deposit_video = deposit_video.publish() _, record_video = deposit_video.fetch_published() assert record_video['title']['title'] == 'test'