Пример #1
0
    def dump(self):
        """Perform record dump."""
        dt = datetime.datetime.utcnow()

        exception_handlers = {
            UnexpectedValue: importer_exception_handler,
            MissingRequiredField: importer_exception_handler,
            ManualImportRequired: importer_exception_handler,
        }

        marc_record = create_record(self.data)

        try:

            # MARCXML -> JSON fields translation
            val = self.dojson_model.do(marc_record,
                                       exception_handlers=exception_handlers)
            # check for missing rules
            missing = self.dojson_model.missing(marc_record)

            if missing:
                raise LossyConversion(missing=missing)
            return dt, val

        except LossyConversion as e:
            current_app.logger.error("MIGRATION RULE MISSING {0} - {1}".format(
                e.missing, marc_record))
            raise e
        except Exception as e:
            current_app.logger.error(
                "Impossible to convert to JSON {0} - {1}".format(
                    e, marc_record))
            raise e
Пример #2
0
    def _prepare_revision(self, data):
        dt = arrow.get(data["modification_datetime"]).datetime

        exception_handlers = {
            UnexpectedValue: migration_exception_handler,
            MissingRequiredField: migration_exception_handler,
            ManualImportRequired: migration_exception_handler,
        }

        if self.source_type == "marcxml":
            marc_record = create_record(data["marcxml"])
            try:
                val = self.dojson_model.do(
                    marc_record, exception_handlers=exception_handlers)
                missing = self.dojson_model.missing(marc_record)
                if missing:
                    raise LossyConversion(missing=missing)
                return dt, val
            except LossyConversion as e:
                current_app.logger.error(
                    "MIGRATION RULE MISSING {0} - {1}".format(
                        e.missing, marc_record))
                raise e
            except Exception as e:
                current_app.logger.error(
                    "Impossible to convert to JSON {0} - {1}".format(
                        e, marc_record))
                raise e
        else:
            val = data["json"]
            return dt, val
Пример #3
0
    def _prepare_final_revision(self, data):
        dt = arrow.get(data['modification_datetime']).datetime

        exception_handlers = {
            UnexpectedValue: migration_exception_handler,
            MissingRequiredField: migration_exception_handler,
            ManualMigrationRequired: migration_exception_handler,
            }
        if self.source_type == 'marcxml':
            marc_record = create_record(data['marcxml'])
            try:
                val = self.dojson_model.do(
                    marc_record, exception_handlers=exception_handlers)
                missing = self.dojson_model.missing(marc_record)
                if missing:
                    raise LossyConversion(missing=missing)
                update_access(val, self.collection_access)
                return dt, val
            except LossyConversion as e:
                raise e
            except Exception as e:
                current_app.logger.error(
                    'Impossible to convert to JSON {0} - {1}'.format(
                        e, marc_record))
                raise e
        else:
            val = data['json']

            # Calculate the _access key
            update_access(val, self.collection_access)
            return dt, val
Пример #4
0
def check_transformation(marcxml_body, json_body):
    """Check transformation."""
    blob = create_record(marcxml.format(marcxml_body))
    model._default_fields = {
        "_migration": {
            "is_multipart": False,
            "has_related": False,
            "related": [],
            "record_type": "journal",
            "volumes": [],
        }
    }
    record = model.do(blob, ignore_missing=False)

    expected = {
        "_migration": {
            "is_multipart": False,
            "has_related": False,
            "related": [],
            "record_type": "journal",
            "volumes": [],
        },
    }

    expected.update(**json_body)
    assert record == expected
Пример #5
0
def check_transformation(marcxml_body, json_body, model=None):
    """Check transformation."""
    blob = create_record(marcxml.format(marcxml_body))
    record = model.do(blob, ignore_missing=False)
    expected = {}
    expected.update(**json_body)
    assert record == expected
Пример #6
0
def check_transformation(marcxml_body, json_body):
    blob = create_record(marcxml.format(marcxml_body))
    model._default_fields = {"_migration": {**get_helper_dict()}}

    record = model.do(blob, ignore_missing=False)
    expected = {"_migration": {**get_helper_dict()}}
    expected.update(**json_body)
    assert record == expected
Пример #7
0
 def check_transformation(marcxml_body, json_body):
     blob = create_record(marcxml.format(marcxml_body))
     record = model.do(blob)
     expected = {
         '$schema': 'https://cds.cern.ch/schemas/'
                    'records/videos/project/project-v1.0.0.json'
     }
     expected.update(**json_body)
     assert record == expected
Пример #8
0
def test_base_model(app):
    """Test base model."""
    marcxml = pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'base.xml'))

    with app.app_context():
        blob = create_record(marcxml)
        assert model.missing(blob) == {'001', '003', '005'}

        record = model.do(blob)
        assert record['recid'] == 1495143
        assert record['agency_code'] == 'SzGeCERN'
        assert not model.missing(blob)
Пример #9
0
    def _prepare_intermediate_revision(self, data):
        """Convert intermediate versions to marc into JSON."""
        dt = arrow.get(data['modification_datetime']).datetime

        if self.source_type == 'marcxml':
            marc_record = create_record(data['marcxml'])
            return dt, marc_record
        else:
            val = data['json']

        # MARC21 versions of the record are only accessible to admins
        val['_access'] = {
            'read': ['*****@*****.**'],
            'update': ['*****@*****.**']
        }

        return dt, val
Пример #10
0
    def _prepare_final_revision(self, data):
        dt = arrow.get(data['modification_datetime']).datetime

        if self.source_type == 'marcxml':
            marc_record = create_record(data['marcxml'])
            try:
                val = self.dojson_model.do(marc_record)
            except Exception as e:
                current_app.logger.error(
                    'Impossible to convert to JSON {0} - {1}'.format(
                        e, marc_record))
                raise
            missing = self.dojson_model.missing(marc_record, _json=val)
            if missing:
                raise RuntimeError('Lossy conversion: {0}'.format(missing))
        else:
            val = data['json']

        # Calculate the _access key
        update_access(val, self.collection_access)

        return (dt, val)
Пример #11
0
def test_required_fields(app):
    """Test required fields."""
    marcxml = load_fixture_file('videos_project.xml')

    with app.app_context():
        blob = create_record(marcxml)
        record = model.do(blob)

        assert record == {
            '$schema': 'https://cds.cern.ch/schemas/'
                       'records/videos/project/project-v1.0.0.json',
            '_access': {'update': ['*****@*****.**']},
            'category': 'CERN',
            'contributors': [
                {'name': 'CERN Video Productions', 'role': 'Producer'},
                {'name': 'CERN Video Productions', 'role': 'Director'}
            ],
            'keywords': [{'name': 'Higgs', 'source': 'CERN'},
                         {'name': 'anniversary', 'source': 'CERN'}],
            'recid': 2272969,
            'report_number': ['CERN-MOVIE-2017-023'],
            'title': {'title': 'Higgs anniversary 5Y'},
            'type': 'MOVIE',
            'videos': [{'$ref': 'https://cds.cern.ch/record/1'},
                       {'$ref': 'https://cds.cern.ch/record/2'}],
            'external_system_identifiers': [
                {'schema': 'AVW', 'value': 'AVW.project.2963'}
            ],
            'modified_by': '*****@*****.**',
        }

        # Add required fields calculated by post-process tasks.
        record['publication_date'] = '2017-07-04'
        record['date'] = '2017-07-04'
        validate(
            record,
            schema={'schema': record['$schema']},
            types={'array': (list, tuple)}
        )
Пример #12
0
    def _prepare_revision(self, data):
        timestamp = arrow.get(data["modification_datetime"]).datetime

        exception_handlers = {
            UnexpectedValue: migration_exception_handler,
            MissingRequiredField: migration_exception_handler,
            ManualImportRequired: migration_exception_handler,
        }

        if self.source_type == "marcxml":
            marc_record = create_record(data["marcxml"])
            try:
                json_converted_record = self.dojson_model.do(
                    marc_record, exception_handlers=exception_handlers)
            except Exception as e:
                raise JSONConversionException(e)
            missing = self.dojson_model.missing(marc_record)
            if missing:
                raise LossyConversion(missing=missing)
            return timestamp, json_converted_record
        else:
            return timestamp, data["json"]
Пример #13
0
    def dump(self):
        """Perform record dump."""
        dt = datetime.datetime.utcnow()

        marc_record = create_record(self.data)
        if "d" in marc_record.get("leader", []):
            is_deletable = True
        else:
            is_deletable = False

        # MARCXML -> JSON fields translation
        try:
            val = self.dojson_model.do(marc_record,
                                       exception_handlers=xml_import_handlers)
        except AttributeError:
            raise RecordModelMissing

        if not self.ignore_missing:
            # check for missing rules
            missing = self.dojson_model.missing(marc_record)

            if missing:
                raise LossyConversion(missing=missing)
        return dt, val, is_deletable
Пример #14
0
def test_required_fields(app):
    """Test required fields."""
    marcxml = load_fixture_file('videos_video.xml')

    with app.app_context():
        blob = create_record(marcxml)
        record = model.do(blob)

        expected = {
            '$schema': 'https://cds.cern.ch/schemas/records/videos/video/'
                       'video-v1.0.0.json',
            '_access': {'read': ['*****@*****.**',
                                 '*****@*****.**',
                                 '*****@*****.**',
                                 '*****@*****.**'],
                        'update': ['*****@*****.**',
                                   '*****@*****.**']},
            '_files': [
                {
                    'filepath': 'MediaArchive/Video/Masters/Movies/CERN/2017/CERN-MOVIE-2017-023/Final_Output/CERN-MOVIE-2017-023-001.mov',
                    'key': 'CERN-MOVIE-2017-023-001.mov',
                    'tags': {
                        'media_type': 'video',
                        'content_type': 'mov',
                        'context_type': 'master',
                        'preview': True,
                    },
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-5872-kbps-1920x1080-audio-128-kbps-stereo.mp4',
                    'key': 'CERN-MOVIE-2017-023-001-5872-kbps-1920x1080-audio-128-kbps-stereo.mp4',
                    'tags_to_guess_preset': {'preset': '1080p', 'video_bitrate': 5872},
                    'tags': {
                        'media_type': 'video',
                        'content_type': 'mp4',
                        'context_type': 'subformat',
                    },
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-2672-kbps-1280x720-audio-128-kbps-stereo.mp4',
                    'key': 'CERN-MOVIE-2017-023-001-2672-kbps-1280x720-audio-128-kbps-stereo.mp4',
                    'tags_to_guess_preset': {'preset': '720p', 'video_bitrate': 2672},
                    'tags': {
                        'media_type': 'video',
                        'content_type': 'mp4',
                        'context_type': 'subformat',
                    },
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-1436-kbps-853x480-audio-64-kbps-stereo.mp4',
                    'key': 'CERN-MOVIE-2017-023-001-1436-kbps-853x480-audio-64-kbps-stereo.mp4',
                    'tags_to_guess_preset': {'preset': '480p', 'video_bitrate': 1436},
                    'tags': {
                        'media_type': 'video',
                        'content_type': 'mp4',
                        'context_type': 'subformat',
                    },
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-836-kbps-640x360-audio-64-kbps-stereo.mp4',
                    'key': 'CERN-MOVIE-2017-023-001-836-kbps-640x360-audio-64-kbps-stereo.mp4',
                    'tags_to_guess_preset': {'preset': '360p', 'video_bitrate': 836},
                    'tags': {
                        'media_type': 'video',
                        'content_type': 'mp4',
                        'context_type': 'subformat',
                    },
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-386-kbps-426x240-audio-64-kbps-stereo.mp4',
                    'key': 'CERN-MOVIE-2017-023-001-386-kbps-426x240-audio-64-kbps-stereo.mp4',
                    'tags_to_guess_preset': {'preset': '240p', 'video_bitrate': 386},
                    'tags': {
                        'media_type': 'video',
                        'content_type': 'mp4',
                        'context_type': 'subformat',
                    },
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-5-percent.jpg',
                    'key': 'frame-1.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 5}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-5-percent.jpg',
                    'key': 'posterframe.jpg',
                    'tags': {
                        'media_type': 'image',
                        'height': '360',
                        'width': '640',
                        'content_type': 'jpg',
                        'context_type': 'poster',
                    },
                    'tags_to_transform': {'timestamp': 5},
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-15-percent.jpg',
                    'key': 'frame-2.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 15}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-25-percent.jpg',
                    'key': 'frame-3.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 25}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-35-percent.jpg',
                    'key': 'frame-4.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 35}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-45-percent.jpg',
                    'key': 'frame-5.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 45}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-55-percent.jpg',
                    'key': 'frame-6.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 55}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-65-percent.jpg',
                    'key': 'frame-7.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 65}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-75-percent.jpg',
                    'key': 'frame-8.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 75}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-85-percent.jpg',
                    'key': 'frame-9.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 85}
                },
                {
                    'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-95-percent.jpg',
                    'key': 'frame-10.jpg',
                    'tags': {'content_type': 'jpg',
                             'context_type': 'frame',
                             'media_type': 'image'},
                    'tags_to_transform': {'timestamp': 95}
                }
            ],
            '_project_id': 'https://cds.cern.ch/record/1',
            'category': 'CERN',
            'contributors': [
                {'name': 'CERN Video Productions', 'role': 'Producer'},
                {'name': 'CERN Video Productions', 'role': 'Director'},
                {'affiliations': (u'CERN',),
                 'email': u'*****@*****.**',
                 'ids': [{'source': 'CERN', 'value': u'755568'},
                         {'source': 'CDS', 'value': u'2090563'}],
                 'name': 'Madsen, Christoph Martin',
                 'role': 'Director'},
                {'affiliations': (u'CERN',),
                 'email': u'*****@*****.**',
                 'ids': [{'source': 'CERN', 'value': u'380837'},
                         {'source': 'CDS', 'value': u'2050975'}],
                 'name': 'Catapano, Paola',
                 'role': 'Director'},
                {'affiliations': (u'CERN',),
                 'email': u'*****@*****.**',
                 'ids': [{'source': 'CERN', 'value': u'755568'},
                         {'source': 'CDS', 'value': u'2090563'}],
                 'name': 'Madsen, Christoph Martin',
                 'role': 'Editor'}],
            'copyright': {'holder': 'CERN', 'year': '2017'},
            'date': '2017-07-04',
            'description': ('Where were you on 4 July 2012, the day in which '
                            'the Higgs boson discovery was announced?'),
            'duration': '00:01:09',
            'keywords': [
                {'name': 'higgs', 'source': 'CERN'},
                {'name': 'anniversary', 'source': 'CERN'}
            ],
            'language': u'en',
            'recid': 2272973,
            'report_number': ['CERN-MOVIE-2017-023-001'],
            'title': {'title': 'Happy 5th anniversary, Higgs boson!'},
            'type': 'MOVIE',
            'external_system_identifiers': [
                {'schema': 'AVW', 'value': 'AVW.clip.3447'}
            ],
            'modified_by': '*****@*****.**',
        }

        assert record == expected

        # Add required fields calculated by post-process tasks.
        record['publication_date'] = '2017-07-04'
        record['doi'] = 'CERN/2272973'
        record['license'] = [
            {'license': 'CC BY 4.0',
             'url': 'https://creativecommons.org/licenses/by/4.0/'}
        ]
        validate(
            record,
            schema={'schema': record['$schema']},
            types={'array': (list, tuple)}
        )