Python parse_module_metadata примеры, press.parsers.parse_module_metadata Python примеры использования

Пример #1

0

Показать файл

def test_publish_revision_with_new_abstract(content_util, persist_util, app,
                                            db_engines, db_tables):
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    module = content_util.gen_module(resources=resources)
    module = persist_util.insert_module(module)

    with element_tree_from_model(module) as xml:
        elm = xml.xpath('//md:abstract', namespaces=COLLECTION_NSMAP)[0]
        elm.text += ' -- appendage'

    metadata = parse_module_metadata(module)

    # Collect control data for non-legacy metadata
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_metadata = db_engines['common'].execute(stmt).fetchone()

    # TARGET
    with db_engines['common'].begin() as conn:
        (id, version), ident = publish_legacy_page(
            module,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check for a new abstract insertion
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchone()
    assert result.abstractid != control_metadata.abstractid

Пример #2

0

Показать файл

Файл: test_module.py Проект: openstax/cnx-press

def test_parse_module_metadata(litezip_valid_litezip):
    module_id = 'm37154'
    # given a Module object,
    model = parse_module(litezip_valid_litezip / module_id)
    # parse the metadata into a CollectionMetadata,
    md = parse_module_metadata(model)
    # which we then test for data point information

    assert md.id == module_id
    assert md.version == '1.2'
    assert md.created == '2010/08/09 14:25:38 -0500'
    assert md.revised == '2011/03/08 18:15:08 -0600'
    assert md.title == ('A Student to Student Intro to IDE Programming '
                        'and CCS4')
    assert md.license_url == 'http://creativecommons.org/licenses/by/3.0/'
    assert md.language == 'en'

    assert md.authors == ('mwjhnsn', 'ww2')
    assert md.maintainers == ('mwjhnsn', 'ww2')
    assert md.licensors == ('mwjhnsn', 'ww2')

    assert md.keywords == (
        'CCSv4',
        'Code Composer Studio',
        'ELEC 220',
        'IDE',
        'MSP 430',
    )
    assert md.subjects == ('Science and Technology', )

    assert md.abstract == ("A basic introduction to how to write "
                           "and debug programs in Code Composer Studio V4.")

Пример #3

0

Показать файл

Файл: litezip.py Проект: openstax/cnx-press

def publish_litezip(struct, submission, db_conn):
    """Publish the contents of a litezip structured set of data.

    :param struct: a litezip struct from (probably from
                   :func:`litezip.parse_litezip`)
    :param submission: a two value tuple containing a userid
                       and submit message
    :type submission: tuple
    :param db_conn: a database connection object
    :type db_conn: :class:`sqlalchemy.engine.Connection`

    """
    # Dissect objects from litezip struct.
    try:
        collection = [x for x in struct if isinstance(x, Collection)][0]
    except IndexError:  # pragma: no cover
        raise NotImplementedError('litezip without collection')

    id_map = {}  # pragma: no cover

    # Parse Collection tree to update the newly published Modules.
    with collection.file.open('rb') as fb:
        xml = etree.parse(fb)

    # Publish the Modules.
    for module in [x for x in struct if isinstance(x, Module)]:
        metadata = parse_module_metadata(module)
        old_id = module.id

        try:
            (id, version), ident = publish_legacy_page(module, metadata,
                                                       submission, db_conn)
            id_map[old_id] = (id, version)
            # Update the Collection tree
            xpath = '//col:module[@document="{}"]'.format(old_id)
            for elm in xml.xpath(xpath, namespaces=COLLECTION_NSMAP):
                elm.attrib['document'] = id
                version_attrib_name = (
                    '{{{}}}version-at-this-collection-version'
                    .format(COLLECTION_NSMAP['cnxorg']))
                legacy_version = convert_version_to_legacy_version(version)
                elm.attrib[version_attrib_name] = legacy_version
        except Unchanged:
            pass  # only publish content that has changed.

    modules_changed = bool(id_map)
    if modules_changed:
        # Rebuild the Collection tree from the newly published Modules.
        with collection.file.open('wb') as fb:
            fb.write(etree.tounicode(xml).encode('utf8'))

    # Maybe publish the Collection.
    metadata = parse_collection_metadata(collection)
    old_id = collection.id
    (id, version), ident = publish_legacy_book(
        collection, metadata, submission, db_conn,
        modules_changed=modules_changed)
    id_map[old_id] = (id, version)

    return id_map

Пример #4

0

Показать файл

    def insert_module(self, model, _derived_from=None):
        # This is validly used here because the tests associated with
        # this parser functions are outside the scope of persistent
        # actions dealing with the database.
        from press.parsers import parse_module_metadata
        metadata = parse_module_metadata(model)

        engine = self.db_engines['common']
        t = self.db_tables

        # Anything inserted with this tool must already have a valid id
        assert metadata.id is not None

        with engine.begin() as trans:
            if self._already_exists(trans, model, metadata):
                return model

            # Insert module metadata
            ident, id = self._insert_module_metadata(
                trans,
                metadata,
                'Module',
                _derived_from=_derived_from,
            )

            # Rewrite the content with the id
            with model.file.open('rb') as fb:
                xml = etree.parse(fb)
            elm = xml.xpath('//md:content-id', namespaces=COLLECTION_NSMAP)[0]
            elm.text = id
            with model.file.open('wb') as fb:
                fb.write(etree.tounicode(xml).encode('utf8'))

            # Insert content files
            with model.file.open('rb') as fb:
                result = trans.execute(t.files.insert().values(
                    file=fb.read(),
                    media_type='text/xml',
                ))
            fileid = result.inserted_primary_key[0]
            result = trans.execute(t.module_files.insert().values(
                module_ident=ident,
                fileid=fileid,
                filename='index.cnxml',
            ))
        # Insert resource files (images, pdfs, etc.)
        for resource in model.resources:
            self._insert_module_file(resource, ident)

        return Module(id, model.file, model.resources)

Пример #5

0

Показать файл

Файл: test_module.py Проект: BryanEliDimas/cnx-press

def test_publish_revision_that_overwrites_existing_resources(
        content_util, persist_util, app, db_engines, db_tables):
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    module = content_util.gen_module(resources=resources)
    module = persist_util.insert_module(module)

    # Replace an existing resource with new file contents the module
    replaced_resource = module.resources.pop()
    new_resource = content_util.gen_resource(
        filename=replaced_resource.filename, )
    module.resources.append(new_resource)
    assert replaced_resource.filename == new_resource.filename

    metadata = parse_module_metadata(module)

    # Collect control data for this current version
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_data = db_engines['common'].execute(stmt).fetchall()

    # Ensure the replaced resource really was in the content
    # prior to our publication
    control_files = {x.filename: x for x in control_data}
    replaced_file_record = control_files[replaced_resource.filename]
    assert replaced_file_record.sha1 == replaced_resource.sha1
    assert replaced_file_record.file == replaced_resource.data.read()
    new_resource.data.seek(0)

    # TARGET
    with db_engines['common'].begin() as conn:
        (id, version), ident = publish_legacy_page(
            module,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check for file insertion
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchall()
    files = {x.filename: x for x in result}
    assert new_resource.filename in files
    assert files[new_resource.filename].sha1 == new_resource.sha1
    assert files[new_resource.filename].file == new_resource.data.read()

Пример #6

0

Показать файл

def test_publish_revision_with_new_resources(content_util, persist_util, app,
                                             db_engines, db_tables):
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    module = content_util.gen_module(resources=resources)
    module = persist_util.insert_module(module)

    # Add a new resouce to the module
    new_resource = content_util.gen_resource()
    module.resources.append(new_resource)

    metadata = parse_module_metadata(module)

    # Collect control data for this current version
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_data = db_engines['common'].execute(stmt).fetchall()

    # Ensure the resource is not in the content
    # prior to our publication
    control_files = {x.filename: x for x in control_data}
    assert new_resource.filename not in control_files

    # TARGET
    with db_engines['common'].begin() as conn:
        (id, version), ident = publish_legacy_page(
            module,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check for file insertion
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchall()
    files = {x.filename: x for x in result}
    assert new_resource.filename in files
    assert files[new_resource.filename].sha1 == new_resource.sha1
    assert files[new_resource.filename].file == new_resource.data.read_bytes()

Пример #7

0

Показать файл

def test_publish_revision_with_created_value_changed(content_util,
                                                     persist_util, app,
                                                     db_engines, db_tables):
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    module = content_util.gen_module(resources=resources)
    module = persist_util.insert_module(module)

    with element_tree_from_model(module) as xml:
        elm = xml.xpath('//md:created', namespaces=COLLECTION_NSMAP)[0]
        actual_created = parse_date(elm.text)
        changed_created = actual_created - (timedelta(days=365) * 8)
        elm.text = changed_created.isoformat()

    metadata = parse_module_metadata(module)

    # Collect control data for non-legacy metadata
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_metadata = db_engines['common'].execute(stmt).fetchone()
    assert control_metadata.created == actual_created

    # TARGET
    with db_engines['common'].begin() as conn:
        (id, version), ident = publish_legacy_page(
            module,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check for a new abstract insertion
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchone()
    assert result.created == control_metadata.created
    assert result.created != changed_created

Пример #8

0

Показать файл

def test_publish_revision_that_is_derived(content_util, persist_util, app,
                                          db_engines, db_tables):
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    module = content_util.gen_module(resources=resources)
    module = persist_util.insert_module(module)

    metadata = parse_module_metadata(module)

    # Derive a copy of the collection
    derived_module = persist_util.derive_from(module)
    derived_metadata = parse_module_metadata(derived_module)

    # Collect control data for non-legacy metadata
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.moduleid == derived_metadata.id))
    control_metadata = db_engines['common'].execute(stmt).fetchone()

    # Change the module text, to make it publishable.
    index_cnxml = derived_module.file.read_text()
    start_offset = index_cnxml.find('test document')
    derived_module.file.write_text(index_cnxml[:start_offset] +
                                   'TEST DOCUMENT' +
                                   index_cnxml[start_offset + 13:])

    # TARGET
    with db_engines['common'].begin() as conn:
        now = conn.execute('SELECT CURRENT_TIMESTAMP as now').fetchone().now
        (id, version), ident = publish_legacy_page(
            derived_module,
            derived_metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Lookup parent's metadata (ident and authors)
    # for parentage checks against the derived-copy metadata.
    stmt = (db_tables.latest_modules.select().where(
        db_tables.latest_modules.c.moduleid == module.id))
    parent_metadata_result = db_engines['common'].execute(stmt).fetchone()

    # Check core metadata insertion
    stmt = (db_tables.modules.join(db_tables.abstracts).select().where(
        db_tables.modules.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchone()
    assert result.version == '1.2'
    assert result.uuid == control_metadata.uuid
    assert result.major_version == 2
    assert result.minor_version is None
    assert result.abstract == derived_metadata.abstract
    assert result.created == parse_date(metadata.created)
    assert result.revised == now
    assert result.portal_type == 'Module'
    assert result.name == derived_metadata.title
    assert result.licenseid == 13
    assert result.submitter == 'user1'
    assert result.submitlog == 'test publish'
    assert result.authors == list(derived_metadata.authors)
    assert result.maintainers == list(derived_metadata.maintainers)
    assert result.licensors == list(derived_metadata.licensors)
    assert result.google_analytics == GOOGLE_ANALYTICS_CODE

    # Check for derived metadata (parent and parent authors)
    assert result.parent == parent_metadata_result.module_ident
    assert result.parentauthors == parent_metadata_result.authors

    # Check for file insertion
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident).where(
            db_tables.module_files.c.filename == 'index.cnxml'))
    module_doc = db_engines['common'].execute(stmt).fetchone()
    expected = bytes(
        ('<md:derived-from url="http://cnx.org/content/{}/{}"/>'.format(
            metadata.id, metadata.version)),
        'utf-8',
    )
    assert expected in module_doc.file

Пример #9

0

Показать файл

def test_publish_revision(content_util, persist_util, app, db_engines,
                          db_tables):
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    module = content_util.gen_module(resources=resources)
    module = persist_util.insert_module(module)

    metadata = parse_module_metadata(module)

    # Collect control data for non-legacy metadata
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_metadata = db_engines['common'].execute(stmt).fetchone()

    # TARGET
    with pytest.raises(Unchanged), db_engines['common'].begin() as conn:
        now = conn.execute('SELECT CURRENT_TIMESTAMP as now').fetchone().now
        (id, version), ident = publish_legacy_page(
            module,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Change the module text, to make it publishable.
    index_cnxml = module.file.read_text()
    start_offset = index_cnxml.find('test document')
    module.file.write_text(index_cnxml[:start_offset] + 'TEST DOCUMENT' +
                           index_cnxml[start_offset + 13:])

    # TARGET - again
    with db_engines['common'].begin() as conn:
        now = conn.execute('SELECT CURRENT_TIMESTAMP as now').fetchone().now
        (id, version), ident = publish_legacy_page(
            module,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check core metadata insertion
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchone()
    assert result.version == '1.2'
    assert result.uuid == control_metadata.uuid
    assert result.major_version == 2
    assert result.minor_version is None
    # Check for reuse of the existing abstract
    assert result.abstractid == control_metadata.abstractid
    assert result.created == parse_date(metadata.created)
    assert result.revised == now
    assert result.portal_type == 'Module'
    assert result.name == metadata.title
    assert result.licenseid == 13
    assert result.submitter == 'user1'
    assert result.submitlog == 'test publish'
    assert result.authors == list(metadata.authors)
    assert result.maintainers == list(metadata.maintainers)
    assert result.licensors == list(metadata.licensors)
    assert result.google_analytics == GOOGLE_ANALYTICS_CODE

    # Check subject metadata insertion
    stmt = (db_tables.moduletags.join(db_tables.tags).select().where(
        db_tables.moduletags.c.module_ident == ident))
    results = db_engines['common'].execute(stmt)
    subjects = [x.tag for x in results]
    assert sorted(subjects) == sorted(metadata.subjects)

    # Check keyword metadata insertion
    stmt = (db_tables.modulekeywords.join(db_tables.keywords).select().where(
        db_tables.modulekeywords.c.module_ident == ident))
    results = db_engines['common'].execute(stmt)
    keywords = [x.word for x in results]
    assert sorted(keywords) == sorted(metadata.keywords)

    # Check for file insertion
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchall()
    files = {x.filename: x for x in result}
    assert len(files) == len(resources) + 2  # content files
    assert 'index.cnxml' in files
    assert 'index.cnxml.html' in files

    # Check for resource file insertion
    html_content = files['index.cnxml.html'].file.decode('utf8')
    for resource in resources:
        assert resource.filename in files
        # Check for reference rewrites in the content. This is out of scope
        # for this project, but order of insertion matters in order for
        # the references to be rewritten.
        assert '/resources/{}'.format(resource.sha1) in html_content

Пример #10

0

Показать файл

 def _parse_module_metadata(self, *args, **kwargs):
     # The parser is validly used here because it is unit tested
     # without using this utility.
     from press.parsers import parse_module_metadata
     return parse_module_metadata(*args, **kwargs)

Python parse_module_metadata примеры использования