예제 #1
0
def add_oa_links_in_references(text):
    wikicode = mwparserfromhell.parse(text)
    changed_templates = []

    stats = {
        'nb_templates':0, # total number of templates processed
        'oa_found':0, # hits from the API
        'changed':0, # actual changes on the templates
        'already_present':0, # no change because already present
        }

    for template in wikicode.filter_templates():
        orig_template = deepcopy(template)
        reference = parse_citation_template(template)
        if reference:
            stats['nb_templates'] += 1
            link = get_oa_link(reference)
            if not link:
                changed_templates.append((orig_template,None))
                continue

            # We found an OA link!
            stats['oa_found'] += 1

            change = {}

            argument_found = False
            for argmap in template_arg_mappings:
                # Did the link we have got match that argument place?
                match = argmap.extract(link)
                if not match:
                    continue

                argument_found = True

                # If this parameter is already present in the template,
                # don't change anything
                non_empty = argmap.present(template)           
                if non_empty:
                    change['new_'+argmap.name] = (match,link)
                    stats['already_present'] += 1
                    break

                # If the parameter is not present yet, add it
                stats['changed'] += 1
                if not argmap.is_id:
                    template.add(argmap.name, match)
                    change[argmap.name] = (match,link)
                else:
                    val = '{{%s|%s}}' % (argmap.name,match)
                    template.add('id', val)
                    change['id'] = (val,link)
                break

            changed_templates.append((orig_template, change))
    
    return unicode(wikicode), changed_templates, stats
def get_parsed_citations(content):
    parsed_cites = []

    # Go through each of the templates
    wikicode = mwparserfromhell.parse(content)
    templates = wikicode.filter_templates()
    for tpl in templates:
        citation = parse_citation_template(tpl)
        if citation:
            type_of_citation = tpl.split('|')[0].lower()[2:]
            parsed_cites.append((citation, type_of_citation))
    return parsed_cites
예제 #3
0
def get_generic_template(citation):
    """
    Get generic template of a citation using the wikiciteparser library.

    :param: citation according to a particular format as described in const.py
    """
    not_parseable = {'Title': 'Citation generic template not possible'}
    if not check_if_balanced(citation):
        citation = citation + '}}'
    # Convert the str into mwparser object
    wikicode = mwparserfromhell.parse(citation)
    try:
        template = wikicode.filter_templates()[0]
    except IndexError:
        return not_parseable
    parsed_result = parse_citation_template(template)
    # In case the mwparser is not able to parse the citation template
    return parsed_result if parsed_result is not None else not_parseable
예제 #4
0
파일: main.py 프로젝트: dissemin/oabot
    def propose_change(self):
        """
        Fetches open urls for that template and proposes a change
        """
        reference = parse_citation_template(self.template)
        tpl_name = unicode(self.template.name).lower().strip()
        if not reference or tpl_name in excluded_templates:
            self.classification = 'ignored'
            return

        sys.stdout.write('.')
        sys.stdout.flush()

        # First check if there is already a link to a full text
        # in the citation.
        already_oa_param = None
        already_oa_value = None
        for argmap in template_arg_mappings:
            if argmap.present_and_free(self.template):
                already_oa_param = argmap.name
                already_oa_value = argmap.get(self.template)

        change = {}

        # If so, we just skip it - no need for more free links
        if already_oa_param:
            self.classification = 'already_open'
            self.conflicting_value = already_oa_value
            return

        # --- Disabled for now ----
        # If the template is marked with |registration= or
        # |subscription= , let's assume that the editor tried to find
        # a better version themselves so it's not worth trying.
        if ((get_value(self.template, 'subscription')
            or get_value(self.template, 'registration')) in
            ['yes','y','true']):
            self.classification = 'registration_subscription'
            # return

        dissemin_paper_object = get_dissemin_paper(reference)

        # Otherwise, try to get a free link
        link = get_oa_link(dissemin_paper_object)
        if not link:
            self.classification = 'not_found'
            return

        # We found an OA link!
        self.proposed_link = link

        self.proposed_link_policy = get_paper_values(dissemin_paper_object, 'policy')
        self.issn = get_paper_values(dissemin_paper_object, 'issn')

        # Try to match it with an argument
        argument_found = False
        for argmap in template_arg_mappings:
            # Did the link we have got match that argument place?
            match = argmap.extract(link)
            if not match:
                continue

            argument_found = True

            # If this parameter is already present in the template:
            current_value = argmap.get(self.template)
            if current_value:
                change['new_'+argmap.name] = (match,link)

                #if argmap.custom_access:
                #    stats['changed'] += 1
                #    template.add(argmap.custom_access, 'free')
                #else:

                self.classification = 'already_present'
                # don't change anything
                break

            # If the parameter is not present yet, add it
            self.classification = 'link_added'

            if argmap.is_id:
                self.proposed_change = 'id={{%s|%s}}' % (argmap.name,match)
            else:
                self.proposed_change = '%s=%s' % (argmap.name,match)
            break
예제 #5
0
    def propose_change(self):
        """
        Fetches open urls for that template and proposes a change
        """
        reference = parse_citation_template(self.template)
        tpl_name = unicode(self.template.name).lower().strip()
        if not reference or tpl_name in excluded_templates:
            self.classification = 'ignored'
            return

        sys.stdout.write('.')
        sys.stdout.flush()

        # First check if there is already a link to a full text
        # in the citation.
        already_oa_param = None
        already_oa_value = None
        for argmap in template_arg_mappings:
            if argmap.present_and_free(self.template):
                already_oa_param = argmap.name
                already_oa_value = argmap.get(self.template)

        change = {}

        # If so, we just skip it - no need for more free links
        if already_oa_param:
            self.classification = 'already_open'
            self.conflicting_value = already_oa_value
            return

        # --- Disabled for now ----
        # If the template is marked with |registration= or
        # |subscription= , let's assume that the editor tried to find
        # a better version themselves so it's not worth trying.
        if ((get_value(self.template, 'subscription')
             or get_value(self.template, 'registration'))
                in ['yes', 'y', 'true']):
            self.classification = 'registration_subscription'
            # return

        dissemin_paper_object = get_dissemin_paper(reference)

        # Otherwise, try to get a free link
        link = get_oa_link(dissemin_paper_object)
        if not link:
            self.classification = 'not_found'
            return

        # We found an OA link!
        self.proposed_link = link

        self.proposed_link_policy = get_paper_values(dissemin_paper_object,
                                                     'policy')
        self.issn = get_paper_values(dissemin_paper_object, 'issn')

        # Try to match it with an argument
        argument_found = False
        for argmap in template_arg_mappings:
            # Did the link we have got match that argument place?
            match = argmap.extract(link)
            if not match:
                continue

            argument_found = True

            # If this parameter is already present in the template:
            current_value = argmap.get(self.template)
            if current_value:
                change['new_' + argmap.name] = (match, link)

                #if argmap.custom_access:
                #    stats['changed'] += 1
                #    template.add(argmap.custom_access, 'free')
                #else:

                self.classification = 'already_present'
                # don't change anything
                break

            # If the parameter is not present yet, add it
            self.classification = 'link_added'

            if argmap.is_id:
                self.proposed_change = 'id={{%s|%s}}' % (argmap.name, match)
            else:
                self.proposed_change = '%s=%s' % (argmap.name, match)
            break
예제 #6
0
파일: main.py 프로젝트: ppeach-coder/oabot
    def propose_change(self, only_doi=False):
        """
        Fetches open urls for that template and proposes a change
        """
        reference = parse_citation_template(self.template)
        tpl_name = unicode(self.template.name).lower().strip()
        if not reference or tpl_name in excluded_templates:
            self.classification = 'ignored'
            return

        sys.stdout.write('.')
        sys.stdout.flush()

        # First check if there is already a link to a full text
        # in the citation.
        already_oa_param = None
        already_oa_value = None
        for argmap in template_arg_mappings:
            if argmap.present_and_free(self.template):
                already_oa_param = argmap.name
                already_oa_value = argmap.get(self.template)

        change = {}

        # If so, we just skip it - no need for more free links
        if already_oa_param:
            self.classification = 'already_open'
            self.conflicting_value = already_oa_value
            return

        # --- Disabled for now ----
        # If the template is marked with |registration= or
        # |subscription= , let's assume that the editor tried to find
        # a better version themselves so it's not worth trying.
        if ((get_value(self.template, 'subscription')
             or get_value(self.template, 'registration'))
                in ['yes', 'y', 'true']):
            self.classification = 'registration_subscription'
            # return

        if only_doi:
            dissemin_paper_object = {}
        else:
            dissemin_paper_object = get_dissemin_paper(reference)

        # Otherwise, try to get a free link
        doi = reference.get('ID_list', {}).get('DOI')
        link = get_oa_link(paper=dissemin_paper_object,
                           doi=doi,
                           only_unpaywall=only_doi)
        if link is False:
            self.classification = 'already_open'
            if doi:
                self.proposed_change = "doi-access=free"
                self.proposed_link = "https://doi.org/{}".format(doi)
                return
            # TODO add the DOI suggested by Dissemin if missing. Needs some checks.
            # elif dissemin_paper_object.get('pdf_url') and 'doi.org' in dissemin_paper_object.get('pdf_url'):
            #    self.proposed_change = dissemin_paper_object.get('pdf_url')
            #    return
            else:
                return
        if not link:
            self.classification = 'not_found'
            return

        # We found an OA link!
        self.proposed_link = link
        # If the parameter is not present yet, add it
        self.classification = 'link_added'

        if dissemin_paper_object:
            self.proposed_link_policy = get_paper_values(
                dissemin_paper_object, 'policy')
            # TODO: fetch from Unpaywall?
            self.issn = get_paper_values(dissemin_paper_object, 'issn')

        # Try to match it with an argument
        argument_found = False
        for argmap in template_arg_mappings:
            # Did the link we have got match that argument place?
            match = argmap.extract(link)
            if not match:
                continue

            argument_found = True

            # If this parameter is already present in the template:
            current_value = argmap.get(self.template)
            if current_value:
                change['new_' + argmap.name] = (match, link)

                #if argmap.custom_access:
                #    stats['changed'] += 1
                #    template.add(argmap.custom_access, 'free')
                #else:

                self.classification = 'already_present'
                if argmap.name == 'hdl':
                    self.proposed_change = "hdl-access=free"
                # don't change anything else
                return

            if argmap.is_id:
                self.proposed_change = 'id={{%s|%s}}' % (argmap.name, match)
            else:
                self.proposed_change = '%s=%s' % (argmap.name, match)
                if argmap.name == 'hdl':
                    self.proposed_change += "|hdl-access=free"
            break

        # If we are going to add an URL, check it's not probably redundant
        if self.proposed_change.startswith('url='):
            hdl = get_value(self.template, 'hdl')
            if hdl and hdl in self.proposed_change:
                # Don't actually add the URL but mark the hdl as seemingly OA
                # and hope that the templates will later linkify it
                self.proposed_change = "hdl-access=free"