Пример #1
0
class CreativeWork(Parser):
    schema = tools.RunPython('get_type', ctx)

    title = tools.RunPython('get_title', ctx)
    description = Soup(ctx, 'p', class_='genericfile_description')['#text']
    date_published = tools.ParseDate(
        Soup(ctx, itemprop='datePublished')['#text'])
    date_updated = tools.ParseDate(Soup(ctx, itemprop='dateModified')['#text'])
    rights = tools.OneOf(tools.RunPython('get_rights_url', ctx),
                         tools.RunPython('get_dd', ctx, 'Rights')['#text'],
                         tools.Static(None))
    language = tools.Try(
        tools.ParseLanguage(Soup(ctx, itemprop='inLanguage')['#text']))

    tags = tools.Map(tools.Delegate(ThroughTags), Soup(ctx,
                                                       itemprop='keywords'))

    identifiers = tools.Map(
        tools.Delegate(WorkIdentifier),
        tools.Try(tools.RunPython('get_dd', ctx, 'Permanent Link')),
    )

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Creator), Soup(ctx, itemprop='creator')),
        tools.Map(tools.Delegate(Contributor), Soup(ctx,
                                                    itemprop='contributor')),
        tools.Map(tools.Delegate(Publisher), Soup(ctx, itemprop='publisher')),
    )

    class Extra:
        gwu_unit = tools.RunPython('get_dd', ctx, 'GW Unit')['#text']
        related_url = tools.RunPython('get_dd', ctx, 'Related URL')['#text']
        previous_publication_information = tools.RunPython(
            'get_dd', ctx, 'Previous Publication Information')['#text']
        depositor = tools.RunPython('get_dd', ctx, 'Depositor')['#text']
        characterization = tools.RunPython('get_dd', ctx,
                                           'Characterization')['#text']

    def get_type(self, obj):
        return {
            'http://schema.org/CreativeWork': 'CreativeWork',
            'http://schema.org/Article': 'Article',
            'http://schema.org/Book': 'Book',
        }.get(obj.soup.find('div')['itemtype'], 'CreativeWork')

    def get_title(self, obj):
        title = obj.h1.soup
        title.find('span', class_='label').decompose()
        return title.get_text()

    def get_dd(self, obj, dt):
        dt_tag = obj.soup.find('dt', string=dt)
        if dt_tag:
            return SoupXMLDict(soup=dt_tag.find_next_sibling('dd'))
        return None

    def get_rights_url(self, obj):
        dd = self.get_dd(obj, 'Rights')
        return dd.soup.find('i', class_='glyphicon-new-window').parent['href']
Пример #2
0
class Registration(Parser):
    title = ctx[FIELDS['title']]
    description = ctx[FIELDS['summary']]
    date_published = tools.ParseDate(
        ctx[FIELDS['registration date']].timestamp)
    date_updated = tools.ParseDate(ctx[FIELDS['registration date']].timestamp)
    related_agents = tools.Concat(
        tools.Delegate(PrincipalInvestigator,
                       ctx[FIELDS['primary investigator']]),
        tools.Delegate(OtherInvestigator, ctx[FIELDS['other investigator']]),
        tools.Map(
            tools.Delegate(AdditionalInvestigator),
            tools.RunPython('split_names',
                            ctx[FIELDS['additional investigators']])))
    identifiers = tools.Map(tools.Delegate(WorkIdentifier),
                            tools.RunPython('get_link', ctx.id))

    class Extra:
        registration_date = ctx[FIELDS['registration date']]
        questions_and_objectives = ctx[FIELDS['questions and objectives']]
        study_type = ctx[FIELDS['study type']]
        study_type_detail = ctx[FIELDS['study type other']]
        contact_details = ctx[FIELDS['contact details']]
        participating_institutions = ctx[FIELDS['participating institutions']]
        countries_of_recruitment = ctx[FIELDS['countries of recruitment']]
        funders = ctx[FIELDS['funders']]
        problems_studied = ctx[FIELDS['health conditions or problems studied']]
        patient_population = ctx[FIELDS['patient population']]
        interventions = ctx[FIELDS['interventions']]
        inclusion_criteria = ctx[FIELDS['inclusion criteria']]
        exclusion_criteria = ctx[FIELDS['exclusion criteria']]
        control_or_comparators = ctx[FIELDS['control or comparators']]
        primary_outcomes = ctx[FIELDS['primary outcomes']]
        key_secondary_outcomes = ctx[FIELDS['key secondary outcomes']]
        target_sample_size = ctx[FIELDS['target sample size']]
        recruitment_status = ctx[FIELDS['recruitment status']]
        other_recruitment_status = ctx[FIELDS['other recruitment status']]
        first_enrollment_date = ctx[FIELDS['first enrollment date']]
        expected_enrollment_completion_date = ctx[
            FIELDS['expected enrollment completion date']]
        expected_research_completion_date = ctx[
            FIELDS['expected research completion date']]
        ethical_approval = ctx[FIELDS['ethical approval']]
        ethical_approval_details = ctx[FIELDS['ethical approval details']]
        ethical_committee_judgment = ctx[FIELDS['ethical committee judgment']]
        data = ctx[FIELDS['data']]
        published_paper = ctx[FIELDS['published paper identifier']]
        study_website = ctx[FIELDS['study website']]
        study_results = ctx[FIELDS['study results']]

    def get_link(self, id):
        return LINK_FORMAT.format(id)

    def split_names(self, obj):
        if not obj:
            return None
        return obj.split(',')
Пример #3
0
class Person(Parser):
    given_name = tools.ParseName(ctx.author).first
    family_name = tools.ParseName(ctx.author).last
    additional_name = tools.ParseName(ctx.author).middle
    suffix = tools.ParseName(ctx.author).suffix

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier, tools.Try(ctx.email)))
    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Try(ctx.institution))
Пример #4
0
class FunderRelation(Parser):
    schema = 'Funder'

    agent = tools.Delegate(FunderAgent, ctx)
    awards = tools.Map(tools.Delegate(ThroughAwards),
                       tools.Try(tools.RunPython('get_award', ctx)))

    def get_award(self, obj):
        obj['awardURI']
        return obj
Пример #5
0
class Preprint(Parser):
    title = tools.Try(ctx['DC.Title'])
    description = tools.Try(ctx['DC.Description'])
    # is_deleted
    date_published = tools.ParseDate(tools.Try(ctx['article:published_time']))
    date_updated = tools.ParseDate(tools.Try(ctx['DC.Date']))
    # free_to_read_type
    # free_to_read_date
    rights = tools.Try(ctx['DC.Rights'])
    language = tools.Try(ctx['DC.Language'])

    subjects = tools.Map(tools.Delegate(ThroughSubjects),
                         tools.Static('Biology'),
                         tools.Subjects(tools.Try(ctx['subject-areas'])))
    tags = tools.Map(tools.Delegate(ThroughTags), tools.Try(ctx['category']),
                     tools.Try(ctx['subject-areas']))

    identifiers = tools.Map(tools.Delegate(WorkIdentifier),
                            tools.Try(ctx['og:url']),
                            ctx['citation_public_url'], ctx['citation_doi'])

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Publisher), tools.Try(ctx['DC.Publisher'])),
        tools.Map(tools.Delegate(Creator),
                  tools.RunPython('get_contributors', ctx)))

    # related_works

    class Extra:
        identifiers = ctx['DC.Identifier']
        access_rights = ctx['DC.AccessRights']

    def get_contributors(self, link):
        authors = link.get('citation_author', []) if isinstance(
            link.get('citation_author', []),
            list) else [link['citation_author']]
        institutions = link.get(
            'citation_author_institution', []) if isinstance(
                link.get('citation_author_institution', []),
                list) else [link['citation_author_institution']]
        emails = link.get('citation_author_email', []) if isinstance(
            link.get('citation_author_email', []),
            list) else [link['citation_author_email']]

        contribs = []
        for author, email, institution in itertools.zip_longest(
                authors, emails, institutions):
            contrib = {
                'author': author,
                'institution': institution,
                'email': email,
            }
            contribs.append(contrib)

        return contribs
Пример #6
0
class MODSAgent(Parser):
    schema = tools.RunPython('get_agent_schema', ctx)

    name = tools.OneOf(tools.RunPython(force_text, ctx['mods:displayForm']),
                       tools.RunPython('squash_name_parts', ctx))

    related_agents = tools.Map(
        tools.Delegate(IsAffiliatedWith),
        tools.Concat(
            tools.Try(
                tools.Filter(
                    lambda x: bool(x),
                    tools.RunPython(force_text, ctx['mods:affiliation'])))))

    identifiers = tools.Map(
        tools.Delegate(MODSAgentIdentifier),
        tools.Unique(
            tools.Map(
                tools.Try(tools.IRI(), exceptions=(ValueError, )),
                tools.Map(
                    tools.RunPython(force_text),
                    tools.Filter(
                        lambda obj: 'invalid' not in obj,
                        tools.Try(ctx['mods:nameIdentifier']),
                    )))))

    class Extra:
        name_type = tools.Try(ctx['@type'])
        name_part = tools.Try(ctx['mods:namePart'])
        affiliation = tools.Try(ctx['mods:affiliation'])
        description = tools.Try(ctx['mods:description'])
        display_form = tools.Try(ctx['mods:displayForm'])
        etal = tools.Try(ctx['mods:etal'])
        name_identifier = tools.Try(ctx['mods:nameIdentifier'])

    def squash_name_parts(self, name):
        name_parts = get_list(name, 'mods:namePart')
        return ' '.join([force_text(n) for n in name_parts])

    def get_agent_schema(self, obj):
        name_type = obj.get('@type')
        if name_type == 'personal':
            return 'person'
        if name_type == 'conference':
            return 'organization'
        # TODO SHARE-718
        # if name_type == 'family':
        #    return 'family'
        if name_type == 'corporate':
            return GuessAgentTypeLink(default='organization').execute(
                self.squash_name_parts(obj))
        return GuessAgentTypeLink().execute(self.squash_name_parts(obj))
Пример #7
0
class POContributorAgent(Parser):
    schema = 'Person'

    name = ctx.poName

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.IRI(ctx.poEmail), exceptions=(ValueError, )))

    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Filter(lambda x: 'awardeeName' in x, ctx))

    class Extra:
        po_name = tools.Try(ctx.poName)
        po_email = tools.Try(ctx.poEmail)
Пример #8
0
class CreativeWork(Parser):
    title = ctx.title

    identifiers = tools.Map(tools.Delegate(WorkIdentifier), ctx)

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(FunderRelation), ctx),
        tools.Map(tools.Delegate(ContributorRelation), ctx),
        tools.Map(tools.Delegate(AgentWorkRelation),
                  tools.Filter(lambda x: 'awardeeName' in x, ctx)))

    date_updated = tools.ParseDate(ctx.date)

    class Extra:
        public_access_mandate = ctx.publicAccessMandate
Пример #9
0
class FunderAgent(Parser):
    schema = tools.GuessAgentType(tools.OneOf(ctx.funderName,
                                              ctx.contributorName),
                                  default='organization')

    name = tools.OneOf(ctx.funderName, ctx.contributorName)

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.IRI(
            tools.OneOf(ctx.funderIdentifier,
                        tools.RunPython(force_text, ctx.nameIdentifier),
                        tools.Static(None))),
                  exceptions=(ValueError, )))

    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        funder_identifier = tools.Try(ctx.funderIdentifier)
        funder_identifier_type = tools.Try(ctx.funderIdentifierType)

        contributor_type = tools.Try(ctx.contributorType)
Пример #10
0
class PIContactAgent(Parser):
    schema = 'Person'

    name = ctx.PI_NAME
    related_agents = tools.Concat(
        tools.Map(tools.Delegate(FullIsAffiliatedWith), ctx['org_ctx']),
        tools.Map(tools.Delegate(DeptIsAffiliatedWith),
                  RunPython('if_dept', ctx)))

    def if_dept(self, ctx):
        if ctx['org_ctx']['ORG_DEPT']:
            return {**ctx['org_ctx'], 'hash_breaker': True}
        return None

    class Extra:
        pi_id = RunPython(filter_nil, ctx.PI_ID)
Пример #11
0
class OAIRelatedWork(Parser):
    schema = 'CreativeWork'

    identifiers = tools.Map(tools.Delegate(OAIWorkIdentifier), ctx)

    class Extra:
        identifier = ctx
Пример #12
0
class FundingAgent(Parser):
    schema = tools.GuessAgentType(ctx.sponsorName, default='organization')

    name = ctx.sponsorName

    identifiers = tools.Map(tools.Delegate(AgentIdentifier),
                            tools.IRI(tools.Try(ctx.sponsorIdentifier)))
Пример #13
0
class Person(Parser):
    given_name = tools.OneOf(
        ctx.embeds.users.data.attributes.given_name,
        ctx.embeds.users.errors[0].meta.given_name,
    )
    family_name = tools.OneOf(
        ctx.embeds.users.data.attributes.family_name,
        ctx.embeds.users.errors[0].meta.family_name,
    )
    additional_name = tools.OneOf(
        ctx.embeds.users.data.attributes.middle_names,
        ctx.embeds.users.errors[0].meta.middle_names,
    )
    suffix = tools.OneOf(
        ctx.embeds.users.data.attributes.suffix,
        ctx.embeds.users.errors[0].meta.suffix,
    )

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.RunPython('registered', ctx.embeds.users.data.links.html),
        tools.Try(ctx.embeds.users.data.links.profile_image),
    )

    class Extra:
        locale = tools.Try(ctx.embeds.users.data.attributes.locale)
        date_registered = tools.Try(
            ctx.embeds.users.data.attributes.date_registered)
        active = tools.Try(ctx.embeds.users.data.attributes.active)
        timezone = tools.Try(ctx.embeds.users.data.attributes.timezone)

    def registered(self, context):
        if self.context['attributes']['unregistered_contributor']:
            return None
        return context
Пример #14
0
class Preprint(Parser):
    title = ctx.item['dc:title']
    description = ctx.item.description
    date_published = tools.ParseDate(ctx.item['dc:date'])
    date_updated = tools.ParseDate(ctx.item['dc:date'])

    subjects = tools.Map(
        tools.Delegate(ThroughSubjects),
        tools.Concat(tools.Static('Biology'))
    )

    identifiers = tools.Map(tools.Delegate(WorkIdentifier), ctx.item['dc:identifier'])

    related_agents = tools.Concat(
        tools.Delegate(Publisher, ctx.item['dc:publisher']),
        tools.Map(tools.Delegate(Creator), ctx.item['dc:creator']),
    )
Пример #15
0
class PIAgent(Parser):
    schema = 'Person'

    name = ctx.PI_NAME
    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith), ctx['org_ctx'])

    class Extra:
        pi_id = RunPython(filter_nil, ctx.PI_ID)
Пример #16
0
class ContributorAgent(Parser):
    schema = 'Person'

    family_name = ctx.piLastName
    given_name = ctx.piFirstName

    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Filter(lambda x: 'awardeeName' in x, ctx))
Пример #17
0
class Agent(Parser):
    schema = tools.GuessAgentType(ctx.name)

    name = ctx.name

    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Try(ctx.affiliation))

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Map(tools.IRI(), tools.Try(ctx.sameAs), tools.Try(ctx.email)))

    class Extra:
        givenName = tools.Try(ctx.givenName)
        familyName = tools.Try(ctx.familyName)
        additonalName = tools.Try(ctx.additionalName)
        name = tools.Try(ctx.name)
Пример #18
0
class Project(CreativeWork):
    is_root = True
    related_works = tools.Map(tools.Delegate(IsPartOf),
                              tools.Try(ctx.children))

    related_agents = tools.Concat(
        tools.Map(
            tools.Delegate(Creator),
            tools.Filter(lambda x: x['attributes']['bibliographic'],
                         ctx.contributors)),
        tools.Map(
            tools.Delegate(Contributor),
            tools.Filter(lambda x: not x['attributes']['bibliographic'],
                         ctx.contributors)),
        tools.Map(tools.Delegate(AgentWorkRelation),
                  tools.Try(ctx.institutions)),
    )
Пример #19
0
class FullAwardeeAgent(AwardeeAgent):

    related_agents = tools.Map(tools.Delegate(DeptIsAffiliatedWith),
                               RunPython('if_dept', ctx))

    def if_dept(self, ctx):
        if ctx['ORG_DEPT']:
            return ctx
        return None
Пример #20
0
class CreativeWork(Parser):
    title = ctx.attributes.title
    description = ctx.attributes.description
    is_deleted = tools.Static(False)
    # date_published =
    date_updated = tools.ParseDate(ctx.attributes.date_modified)
    # free_to_read_type =
    # free_to_read_date =
    # rights = tools.Try(ctx.attributes.node_license)  Doesn't seem to have an useful information
    # language =

    identifiers = tools.Map(tools.Delegate(WorkIdentifier), ctx.links.html,
                            ctx.links.self)

    tags = tools.Map(tools.Delegate(ThroughTags), ctx.attributes.category,
                     ctx.attributes.tags)

    class Extra:
        date_created = tools.ParseDate(ctx.attributes.date_created)
Пример #21
0
class PIContributorAgent(Parser):
    schema = 'Person'

    family_name = ctx.piLastName
    given_name = ctx.piFirstName
    additional_name = tools.Try(ctx.piMiddeInitial)

    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith),
                               tools.Filter(lambda x: 'awardeeName' in x, ctx))

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.IRI(ctx.piEmail), exceptions=(ValueError, )))

    class Extra:
        pi_last_name = ctx.piLastName
        pi_first_name = ctx.piFirstName
        pi_middle_initial = tools.Try(ctx.piMiddeInitial)
        pi_email = tools.Try(ctx.piEmail)
class Registration(Parser):
    title = tools.Try(ctx['general-information']['title'])
    description = tools.Try(ctx['additional-trial-info']['abstract'])
    date_updated = tools.ParseDate(tools.Try(ctx['general-information']['last-updated']))
    date_published = tools.ParseDate(tools.Try(ctx['general-information']['published-at']))
    related_agents = tools.Map(tools.Delegate(Creator), tools.Try(ctx.pi))
    identifiers = tools.Map(
        tools.Delegate(WorkIdentifier),
        tools.Try(tools.IRI(ctx['general-information']['url'])),
    )
    subjects = tools.Map(
        tools.Delegate(ThroughSubjects),
        tools.Subjects(
            tools.RunPython(
                process_keywords,
                tools.Try(ctx['additional-trial-info']['keywords']),
            )
        )
    )
    tags = tools.Map(
        tools.Delegate(ThroughTags),
        tools.Concat(
            tools.RunPython(
                process_keywords,
                tools.Try(ctx['additional-trial-info']['keywords']),
            ),
            tools.Try(ctx['additional-trial-info']['status']),
            tools.Try(ctx['additional-trial-info']['jel-code'])
        )
    )

    class Extra:
        general_information = tools.Try(ctx['general-information'])
        additional_trial_information = tools.Try(ctx['additional-trial-info'])
        publication_data = tools.Try(ctx['data-publication'])
        primary_investigator = tools.Try(ctx['pi'])
        interventions = tools.Try(ctx['interventions'])
        outcomes = tools.Try(ctx['outcomes'])
        experimental_design = tools.Try(ctx['experimental-design'])
        experimental_characteristics = tools.Try(ctx['experimental-characteristics'])
        supporting_document_material = tools.Try(ctx['supporting-doc-material'])
        post_trial = tools.Try(ctx['post-trial'])
        reports_papers = tools.Try(ctx['reports-papers'])
Пример #23
0
class DataCenterAgent(Parser):
    schema = tools.GuessAgentType(
        ctx.Data_Center_Name.Long_Name,
        default='organization'
    )

    name = ctx.Data_Center_Name.Long_Name
    related_agents = tools.Map(tools.Delegate(IsAffiliatedWith), tools.Try(ctx.Personnel))

    class Extra:
        data_center_short_name = ctx.Data_Center_Name.Short_Name
Пример #24
0
class DataSet(Parser):
    title = tools.Join(tools.Try(ctx.record.metadata.DIF.Entry_Title))
    description = tools.Try(ctx.record.metadata.DIF.Summary.Abstract)

    related_agents = tools.Map(
        tools.Delegate(AgentWorkRelation),
        tools.Try(ctx.record.metadata.DIF.Data_Center)
    )

    tags = tools.Map(
        tools.Delegate(ThroughTags),
        tools.Try(ctx.record.metadata.DIF.Metadata_Name),
        tools.Try(ctx.record.header.setSpec)
    )

    identifiers = tools.Map(tools.Delegate(WorkIdentifier), tools.Try(ctx.record.metadata.DIF))

    date_updated = tools.ParseDate(ctx.record.header.datestamp)

    is_deleted = tools.RunPython('check_status', tools.Try(ctx.record.header['@status']))

    class Extra:
        status = tools.Try(ctx.record.header['@status'])

        entry_id = tools.Try(ctx.record.metadata.DIF.Entry_ID)

        metadata_name = tools.Try(ctx.record.metadata.DIF.Metadata_Name)

        metadata_version = tools.Try(ctx.record.metadata.DIF.Metadata_Version)

        last_dif_revision_date = tools.Try(ctx.record.metadata.DIF.Last_DIF_Revision_Date)

        set_spec = ctx.record.header.setSpec

    def check_status(self, status):
        if status == 'deleted':
            return True
        return False
Пример #25
0
class ContributorAgent(Parser):
    schema = tools.OneOf(
        tools.GuessAgentType(tools.RunPython(get_agent_type, ctx,
                                             person=False),
                             default='organization'),
        tools.GuessAgentType(tools.OneOf(ctx.creatorName,
                                         ctx.contributorName)))

    name = tools.OneOf(ctx.creatorName, ctx.contributorName)
    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.Map(tools.IRI(ctx),
                            tools.RunPython(force_text, ctx.nameIdentifier)),
                  exceptions=(ValueError, )))
    related_agents = tools.Map(
        tools.Delegate(IsAffiliatedWith),
        tools.Concat(
            tools.Try(
                tools.Filter(lambda x: bool(x),
                             tools.RunPython(force_text, ctx.affiliation)))))

    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        contributor_type = tools.Try(ctx.contributorType)

        # v.4 new givenName and familyName properties
        given_name = tools.OneOf(ctx.creatorName['@givenName'],
                                 ctx.contributorName['@givenName'],
                                 tools.Static(None))
        family_name = tools.OneOf(ctx.creatorName['@familyName'],
                                  ctx.contributorName['@familyName'],
                                  tools.Static(None))
Пример #26
0
class Preprint(osf.Project):
    description = tools.Try(ctx.attributes.abstract)
    date_updated = tools.ParseDate(ctx.attributes.date_modified)
    date_published = tools.ParseDate(ctx.attributes.date_created)
    # NOTE: OSF has a direct mapping to SHARE's taxonomy. Subjects() is not needed
    subjects = tools.Map(tools.Delegate(ThroughSubjects),
                         ctx.attributes.subjects)
    identifiers = tools.Map(tools.Delegate(WorkIdentifier), ctx.links.self,
                            ctx.links.html, tools.Try(ctx.links.doi))
    tags = tools.Map(tools.Delegate(ThroughTags),
                     tools.Try(ctx.attributes.tags))
    rights = tools.Try(ctx.attributes.node_license)

    related_works = tools.Static([])
    related_agents = tools.Concat(
        tools.Map(
            tools.Delegate(osf.Creator),
            tools.Filter(lambda x: x['attributes']['bibliographic'],
                         ctx.contributors)),
        tools.Map(
            tools.Delegate(osf.Contributor),
            tools.Filter(lambda x: not x['attributes']['bibliographic'],
                         ctx.contributors)),
    )
Пример #27
0
class CreativeWork(Parser):
    title = ctx.title
    description = tools.Try(ctx.description)
    is_deleted = tools.RunPython('_is_deleted', tools.Try(ctx.otherProperties))
    date_updated = tools.ParseDate(tools.Try(ctx.providerUpdatedDateTime))
    rights = tools.Join(tools.Try(ctx.licenses.uri))

    # Note: this is only taking the first language in the case of multiple languages
    language = tools.ParseLanguage(tools.Try(ctx.languages[0]), )

    related_agents = tools.Concat(
        tools.Map(tools.Delegate(Creator), tools.Try(ctx.contributors)),
        tools.Map(tools.Delegate(Publisher), tools.Try(ctx.publisher)),
        tools.Map(tools.Delegate(Funder), tools.Try(ctx.sponsorships)))

    identifiers = tools.Map(
        tools.Delegate(WorkIdentifier),
        tools.Map(
            tools.IRI(),
            tools.RunPython(
                'unique',
                tools.Concat(tools.Try(ctx.uris.canonicalUri),
                             tools.Try(ctx.uris.providerUris),
                             tools.Try(ctx.uris.descriptorUris),
                             tools.Try(ctx.uris.objectUris)))))

    subjects = tools.Map(tools.Delegate(ThroughSubjects),
                         tools.Subjects(tools.Try(ctx.subjects)))

    tags = tools.Map(tools.Delegate(ThroughTags), tools.Try(ctx.tags),
                     tools.Try(ctx.subjects))

    class Extra:
        """
        Fields that are combined in the base parser are relisted as singular elements that match
        their original entry to preserve raw data structure.
        """
        freeToRead = tools.Try(ctx.freeToRead)
        languages = tools.Try(ctx.languages)
        licenses = tools.Try(ctx.licenses)
        otherProperties = tools.Try(ctx.otherProperties)
        publisher = tools.Try(ctx.publisher)
        subjects = tools.Try(ctx.subjects)
        sponsorships = tools.Try(ctx.sponsorships)
        tags = tools.Try(ctx.tags)
        uris = tools.Try(ctx.uris)
        version = tools.Try(ctx.version)

    def unique(self, items):
        return list(sorted(set(items)))

    def _is_deleted(self, properties):
        for prop in properties or []:
            if prop['name'] == 'status':
                return 'deleted' in prop['properties'].get('status', [])
        return False
Пример #28
0
class HostAgent(Parser):
    schema = tools.GuessAgentType(ctx.contributorName, default='organization')

    name = tools.Try(ctx.contributorName)

    identifiers = tools.Map(
        tools.Delegate(AgentIdentifier),
        tools.Try(tools.IRI(tools.RunPython(force_text, ctx.nameIdentifier)),
                  exceptions=(InvalidIRI, )))

    class Extra:
        name_identifier = tools.Try(ctx.nameIdentifier)
        name_identifier_scheme = tools.Try(
            ctx.nameIdentifier['@nameIdentifierScheme'])
        name_identifier_scheme_uri = tools.Try(
            ctx.nameIdentifier['@schemeURI'])

        contributor_type = tools.Try(ctx.contributorType)
Пример #29
0
class Dataset(Parser):
    title = tools.Try(ctx['title'])
    description = tools.Try(ctx['description'])

    rights = tools.Try(
        tools.Join(
            tools.Concat(tools.Try(ctx['access-rights']),
                         tools.Try(ctx['usage-rights']))))

    related_agents = tools.Map(tools.Delegate(Creator), tools.Try(ctx.contact))

    class Extra:
        access_rights = tools.Try(ctx['access-rights'])
        usage_rights = tools.Try(ctx['usage-rights'])
        collection_statistics = tools.Try(ctx['collection-statistics'])
        management = tools.Try(ctx['management'])
        collection_type = tools.Try(ctx['collection-type'])
        last_update = tools.ParseDate(tools.Try(ctx['last-update']))
class ThroughSubjects(Parser):
    subject = tools.Delegate(Subject, ctx)