Пример #1
0
def generate_html(bib_csl_data, style_name='harvard1'):
    assert bib_csl_data
    bib_source = bib_csl_data
    bib_style = CitationStylesStyle(style_name, validate=False)

    bibliography = CitationStylesBibliography(
        bib_style, bib_source, formatter.html)

    bib_cites = [Citation([CitationItem(item)]) for item in bib_source]

    for item in bib_cites:
        bibliography.register(item)

    # FIXME: Can this be avoided?
    for item in bib_cites:
        bibliography.cite(item, _cite_warn)

    out = ''
    bibliography.sort()
    for item in bibliography.bibliography():
        line = '<p>' + ''.join(item) + '</p>\n'

        out += line

    return out
Пример #2
0
def render_citation(node, style='apa'):
    """Given a node, return a citation"""
    if isinstance(node, Node):
        data = [node.csl, ]
    elif isinstance(node, PreprintService):
        csl = preprint_csl(node, node.node)
        data = [csl, ]
    else:
        raise ValueError

    bib_source = CiteProcJSON(data)

    bib_style = CitationStylesStyle(os.path.join(CITATION_STYLES_PATH, style), validate=False)

    bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.plain)

    citation = Citation([CitationItem(node._id)])

    bibliography.register(citation)

    def warn(citation_item):
        pass

    bibliography.cite(citation, warn)
    bib = bibliography.bibliography()
    return unicode(bib[0] if len(bib) else '')
Пример #3
0
def render_citation(node, style='apa'):
    """Given a node, return a citation"""
    if isinstance(node, Node):
        data = [
            node.csl,
        ]
    elif isinstance(node, PreprintService):
        csl = preprint_csl(node, node.node)
        data = [
            csl,
        ]
    else:
        raise ValueError

    bib_source = CiteProcJSON(data)

    bib_style = CitationStylesStyle(os.path.join(CITATION_STYLES_PATH, style),
                                    validate=False)

    bibliography = CitationStylesBibliography(bib_style, bib_source,
                                              formatter.plain)

    citation = Citation([CitationItem(node._id)])

    bibliography.register(citation)

    def warn(citation_item):
        pass

    bibliography.cite(citation, warn)
    bib = bibliography.bibliography()
    return unicode(bib[0] if len(bib) else '')
Пример #4
0
def parse_zotero_bibtex(items, citation_style_path):
	
	# separate the bibtex items and extract the key
	p = re.compile(ur'{([^,]+)')
	bibtexItems = []
	for item in items.split('@'):
		if len(item) > 1:
			# make a tuple containing bibtex key and bibtex string
			# splitting removed the @ of each entry so we need to add it back to get valid bibtex
			bibtexItems.append((re.search(p, item).group(1), "@" + item))

	# citeproc_py expects a bibtex file for input so we cannot just pass the bibtex string,
	# so we need to write it into a virtual file using StringIO
	output = StringIO.StringIO()
	output.write(items)
	output.seek(0)

	itemDicts = []
	try:
		# initialize citeproc-py
		bib_source = BibTeX(output)
		bib_style = CitationStylesStyle(citation_style_path, validate=False)
		
		# parse each bibtex entry into a html formatted bibliography entry
		# instead of generating a bibliography of all entries at once, create a
		# bibliography for each of the single items to retaing mapping to the 
		# corresponding bibtex string
		for (key, bibtex) in bibtexItems:
			bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.html)

			citationItem = CitationItem(key)
			citation = Citation([citationItem])
			
			bibliography.register(citation)
			bibliography.cite(citation, warning)
			
			renderedItem = ""

			# the rendered bibliography item returned by citeproc-py is split
			# inton an array of separate tokens, so we need to join them together 
			for token in bibliography.bibliography()[0]:
				renderedItem += token 

			itemDicts.append({
					'key': key,
					'bibtex': bibtex,
					'html': renderedItem
				})

	except:
		warning("ERROR PARSING")
		traceback.print_exc(file=sys.stderr)

	finally:
		output.close()
		return itemDicts
Пример #5
0
def print_using_citeproc(csl_json, keys, style):

    from citeproc import CitationStylesStyle, CitationStylesBibliography
    from citeproc import Citation, CitationItem
    from citeproc import formatter
    from citeproc.source.json import CiteProcJSON

    def warn(citation_item):
        raise RuntimeError(
            "Reference with key '{}' not found".format(citation_item.key)
        )

    bib_source = CiteProcJSON([csl_json])
    bib_style = CitationStylesStyle(style, validate=False)
    bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.html)
    citations = []
    # the following lines just do whatever example in citeproc repo does
    for key in keys:
        citation = Citation([CitationItem(key)])
        bibliography.register(citation)
        citations.append(citation)
    for citation in citations:
        # unused = bibliography.cite(citation, warn_missing_key)
        unused = bibliography.cite(citation, warn)
    for item in bibliography.bibliography():
        print(str(item))
Пример #6
0
class CSLRenderer(object):
    """
        Interface with citeproc-py CSL library
    """
    def __init__(self, doc, style):
        self.citations={}
        self.doc=doc
        style=style.lower()

        bib_source = self.convertReferencesToJSON()
        bib_source = CiteProcJSON(bib_source)
        bib_style = CitationStylesStyle(os.path.join(CSL_PATH,style), validate=False)

        self.bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.html)
        self.prepareCitations()

    def convertReferencesToJSON(self):
        """
            Converts a SciDoc's references into the JSON format expected
        """
        def copyFields(dict1, dict2, field_list):
            for field in field_list:
                if field[0] in dict1:
                    dict2[field[1]]=dict1[field[0]]

        res=[]
        for ref in self.doc.references:
            newref={}
            copyFields(ref, newref,
                # new = old
                [("id","id"),
                ("title","title"),
                ("authors","author"),
                ("publisher","publisher-name"),
                ("title","title"),
                ("type","type"),
                ("URL","url"),
                ])
            newref["issued"]={"date-parts":[(ref.get("year","0"),)]}
            res.append(newref)
        return res

    def prepareCitations(self):
        """
        """
        for cit in self.doc.citations:
            self.citations[cit["id"]]=Citation([CitationItem(cit["ref_id"])])
            self.bibliography.register(self.citations[cit["id"]])

    def getCitationText(self, cit):
        """
        """
        warn=lambda x:None
        return self.bibliography.cite(self.citations[cit["id"]], warn)

    def getBibliography(self):
        """
            Returns the formatted bibliography
        """
        return [str(item) for item in self.bibliography.bibliography()]
Пример #7
0
def render_citation(node, style='apa'):
    """Given a node, return a citation"""
    csl = None
    if isinstance(node, PreprintService):
        csl = preprint_csl(node, node.node)
        data = [
            csl,
        ]
    else:
        data = [
            node.csl,
        ]

    bib_source = CiteProcJSON(data)

    bib_style = CitationStylesStyle(os.path.join(CITATION_STYLES_PATH, style),
                                    validate=False)

    bibliography = CitationStylesBibliography(bib_style, bib_source,
                                              formatter.plain)

    citation = Citation([CitationItem(node._id)])

    bibliography.register(citation)

    def warn(citation_item):
        pass

    bibliography.cite(citation, warn)
    bib = bibliography.bibliography()
    cit = unicode(bib[0] if len(bib) else '')

    title = csl['title'] if csl else node.csl['title']
    if cit.count(title) == 1:
        i = cit.index(title)
        prefix = clean_up_common_errors(cit[0:i])
        suffix = clean_up_common_errors(cit[i + len(title):])
        cit = prefix + title + suffix
    elif cit.count(title) == 0:
        cit = clean_up_common_errors(cit)

    return cit
Пример #8
0
def render_citation(node, style='apa'):
    """Given a node, return a citation"""
    data = [node.csl, ]

    bib_source = CiteProcJSON(data)

    bib_style = CitationStylesStyle(os.path.join(CITATION_STYLES_PATH, style), validate=False)

    bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.plain)

    citation = Citation([CitationItem(node._id)])

    bibliography.register(citation)

    def warn(citation_item):
        pass

    bibliography.cite(citation, warn)
    bib = bibliography.bibliography()
    return unicode(bib[0] if len(bib) else '')
Пример #9
0
def make_citation_by_string(publications):
    bib_str = render_to_string('semsite/export/publications.bib',
                               {'publications': publications})
    bib = bibtex.BibTeX(io.StringIO(bib_str), encoding='utf-8')
    bib_style = CitationStylesStyle('gost-r-7-0-5-2008',
                                    'ru-ru',
                                    validate=False)
    bibliography = CitationStylesBibliography(bib_style, bib)
    citation_items = [CitationItem(key) for key in bib]
    citation = Citation(citation_items)
    bibliography.register(citation)
    return bibliography.cite(citation, warn)
Пример #10
0
def get_bib():
    bib_source = BibTeX('notebooks/bibliography.bib', encoding='utf8')
    bib_style = CitationStylesStyle('notebooks/springer.csl', validate=False)

    bibliography = CitationStylesBibliography(bib_style, bib_source,
                                              formatter.html)
    bib_cites = [Citation([CitationItem(item)]) for item in bib_source]

    for item in bib_cites:
        bibliography.register(item)
    for item in bib_cites:
        bibliography.cite(item, _cite_warn)

    num = len(bibliography.keys)
    bib_entries = dict()
    for i in range(num):
        bib = ''.join(bibliography.bibliography()[i])
        # remove beginning digits and \. from bib entries
        bib = '{}.&emsp;' + re.sub("^\d+\.", "", bib)
        bib_entries[bibliography.keys[i]] = bib

    return bib_entries
Пример #11
0
def render_citation(node, style='apa'):
    """Given a node, return a citation"""
    if isinstance(node, Node):
        data = [node.csl, ]
    elif isinstance(node, PreprintService):
        csl = preprint_csl(node, node.node)
        data = [csl, ]
    else:
        raise ValueError

    bib_source = CiteProcJSON(data)

    bib_style = CitationStylesStyle(os.path.join(CITATION_STYLES_PATH, style), validate=False)

    bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.plain)

    citation = Citation([CitationItem(node._id)])

    bibliography.register(citation)

    def warn(citation_item):
        pass

    bibliography.cite(citation, warn)
    bib = bibliography.bibliography()

    if len(bib):
        doi = data[0].get('DOI')
        if style == 'apa':
            first_segment = [list(bib[0])[0][:-2]]
            return ''.join(first_segment + list(bib[0])[1:13]) if doi else ''.join(first_segment + list(bib[0])[1:12] + list(bib[0])[13:])
        elif style == 'modern-language-association':
            return ''.join(list(bib[0])[:4] + ['.'] + list(bib[0])[4:5] + list(bib[0])[6:-2])
        elif style == 'chicago-author-date':
            return ''.join(list(bib[0])[0:3] + ['.'] + list(bib[0])[3:4] + [' '] + list(bib[0])[5:])
        else:
            return unicode(bib[0])
    else:
        return ''
Пример #12
0
def render_citation(node, style='apa'):
    """Given a node, return a citation"""
    data = [
        node.csl,
    ]

    bib_source = CiteProcJSON(data)

    bib_style = CitationStylesStyle(os.path.join(CITATION_STYLES_PATH, style),
                                    validate=False)

    bibliography = CitationStylesBibliography(bib_style, bib_source,
                                              formatter.plain)

    citation = Citation([CitationItem(node._id)])

    bibliography.register(citation)

    def warn(citation_item):
        pass

    bibliography.cite(citation, warn)
    bib = bibliography.bibliography()
    return unicode(bib[0] if len(bib) else '')
Пример #13
0
class Bibliography:
    def __init__(self, bib_file, csl_file):
        self.bib_file = bib_file
        self.csl_file = csl_file
        self.bibtex = BibTeX(bib_file)
        self.style = CitationStylesStyle(csl_file, validate=False)
        self.bibliography = CitationStylesBibliography(bib_style, bib_source,formatter.plain)

    def citation(self, pandocname):
        c = Citation([CitationItem('whole-collection')])
        self.bibliography.register(c)
        return self.bibliography.cite(c)
    
    def bibliography(self):
        for item in self.bibliography.bibliography():
            str(item)
Пример #14
0
def bib(style_path,
        ds,
        return_cites_and_keys = False,
        formatter = "chocolate",
        dumb_quotes = True,
          # Turning this off won't educate any straight quotes in
          # the data, but leaving it on will stupefy all the
          # smart quotes in the output.
        apa_tweaks = True,
        # The below options are ignored unless apa_tweaks is on.
        always_include_issue = False,
        include_isbn = False,
        url_after_doi = False,
        publisher_website = True,
        abbreviate_given_names = True):

    if isinstance(formatter, str):
        try:             formatter = formatter_from_name[formatter]
        except KeyError: raise ValueError('Unknown formatter "{}"'.format(formatter))        

    style = get_style(style_path, apa_tweaks,
        include_isbn, url_after_doi, abbreviate_given_names)

    ds = deepcopy(ds)
    if apa_tweaks:
    # Distinguish entries that would have identical authors and years
    # by adding suffixes to the years.
        # Group works by author and year.
        ay = defaultdict(list)
        for d in ds:
            k = repr(d.get('author') or d.get('editor'))  + '/' + str(d['issued']['date-parts'][0][0])
            if not any(d is v for v in ay[k]):
                ay[k].append(d)
        # If any group has more than one element, add suffixes.
        for v in ay.values():
            if len(v) > 1:
                for i, d in enumerate(sorted(v, key = title_sort_key)):
                   d['year_suffix'] = ascii_lowercase[i]
    for d in ds:
        if 'id' not in d:
            d['id'] = str(random())
        for k in list(d.keys()):
            if d[k] is None: del d[k]
        if apa_tweaks:
            # By default, don't include the issue number for
            # journal articles.
            if not always_include_issue and d['type'] == 'article-journal':
                delf(d, 'issue')
            # Use the weird "Retrieved from Dewey, Cheatem, &
            # Howe website: http://example.com" format prescribed
            # for reports.
            if publisher_website and d['type'] == 'report' and 'publisher' in d and 'URL' in d:
                d['URL'] = '{} website: {}'.format(
                    d.pop('publisher'), d['URL'])
            # Add structure words for presentations and include
            # the event place.
            if d['type'] == 'speech' and d['genre'] == 'paper':
                d['event'] = 'meeting of the {}, {}'.format(
                    d.pop('publisher'), d['event-place'])
            if d['type'] == 'speech' and d['genre'] == 'video':
                d['medium'] = 'Video file'
                del d['genre']
            # When abbreviating given names, remove hyphens
            # preceding lowercase letters. Otherwise, weird
            # stuff happens.
            if abbreviate_given_names and 'author' in d:
               for a in d['author']:
                   if 'given' in a:
                       a['given'] = sub(
                           '-(.)',
                           lambda mo:
                               ("" if mo.group(1).islower() else "-") +
                               mo.group(1),
                           a['given'])
            # Abbreviate a long list of authors with an ellipsis
            # and the final author.
            if 'author' in d and len(d['author']) > 7:
                d['author'] = (
                    d['author'][0:6] +
                    [{'given': '', 'family': '⣥<ellipsis>⣥'}] +
                    d['author'][-1:])

    bibliography = CitationStylesBibliography(
        style,
        {ref.key: ref for ref in parse_references(ds)},
        formatter)
    cites = [ Citation([CitationItem(d['id'])]) for d in ds ]
    for c in cites: bibliography.register(c)
    def sort_key_f(item):
        ref = item.reference
        names = [(name['family'].lower(), name['given'][0].lower() if 'given' in name else '')
            for name in ref.get('author') or ref.get('editor')
            if name.family != '⣥<ellipsis>⣥']
        return (names, ref['issued']['year'],
            title_sort_key(ref),
            ref['page']['first'] if 'page' in ref else '')
    if len(ds) > 1:
        # Sort the bibliography
        # bibliography.sort()   # Doesn't appear to handle leading "the"s correctly.
        bibliography.items = sorted(bibliography.items, key = sort_key_f)
        bibliography.keys = [item.key for item in bibliography.items]
    bibl = bibliography.bibliography()

    for i, s in enumerate(bibl):
        s = ''.join(s)
        # Fix spacing and punctuation issues.
        s = s.replace('  ', ' ')
        s = sub(r'([.!?…])\.', r'\1', s)
        if dumb_quotes:
            s = s.replace('‘', "'").replace('’', "'").replace('“', '"').replace('”', '"')
        if apa_tweaks:
            if formatter is citeproc.formatter.html or formatter is chocolate:
                # Italicize the stuff between a journal name and a volume
                # number.
                s = sub(r'</i>, <i>(\d)', r', \1', s)
            # Make "p." into "pp." when more than one page is cited.
            s = sub(r'(\W)p\. (\S+[,–])', r'\1pp. \2', s)
            # Replace the ellipsis placeholder.
            s = s.replace('⣥<ellipsis>⣥, ., &', '…')
        bibl[i] = s

    if return_cites_and_keys:
        fcites = [bibliography.cite(c, lambda x: None) for c in cites]
        return (fcites, bibliography.keys, bibl)
    else:
        return bibl
Пример #15
0

# In the second pass, CitationStylesBibliography can generate citations.
# CitationStylesBibliography.cite() requires a callback function to be passed
# along to be called in case a CitationItem's key is not present in the
# bibliography.

def warn(citation_item):
    print("WARNING: Reference with key '{}' not found in the bibliography."
          .format(citation_item.key))


print('Citations')
print('---------')

print(bibliography.cite(citation1, warn))
print(bibliography.cite(citation2, warn))
print(bibliography.cite(citation3, warn))
print(bibliography.cite(citation4, warn))
print(bibliography.cite(citation5, warn))


# And finally, the bibliography can be rendered.

print('')
print('Bibliography')
print('------------')

for item in bibliography.bibliography():
    print(str(item))
Пример #16
0

# In the second pass, CitationStylesBibliography can generate citations.
# CitationStylesBibliography.cite() requires a callback function to be passed
# along to be called in case a CitationItem's key is not present in the
# bibliography.

def warn(citation_item):
    print("WARNING: Reference with key '{}' not found in the bibliography."
          .format(citation_item.key))


print('Citations')
print('---------')

print(bibliography.cite(citation1, warn))
print(bibliography.cite(citation2, warn))
print(bibliography.cite(citation3, warn))
print(bibliography.cite(citation4, warn))
print(bibliography.cite(citation5, warn))


# And finally, the bibliography can be rendered.

print('')
print('Bibliography')
print('------------')

for item in bibliography.bibliography():
    print(str(item))
    # * a formatter (plain, html, or you can write a custom formatter)

    bibliography = CitationStylesBibliography(bib_style, bib_source,
                                              formatter.html)
    ######################################################################################################
    ######################################################################################################
    #######     docx document object
    doc = docx.Document()
    doc.add_heading("References", 0)
    print("References")
    for item in bib_source:

        citation = Citation([CitationItem(item)])

        bibliography.register(citation)
        item_string = bibliography.cite(citation, warn)

    html_helper = HTMLHelper()
    bibliography.sort()
    for item in bibliography.bibliography():
        ######################################################################################################
        ######################################################################################################
        #######     take out extra characters in citation, that are artifacts of the citeproc citation
        #######     creation process with some of our bib records
        #######
        #######     Also make the editor label plural if there are multiple ("eds.")
        item = str(item)
        item = item.replace(", n.d..", "")
        item = item.replace(',,', ',')
        item = item.replace('..', '.')
        item = re.sub(r'([^<]+?and[^<]+?)(ed.)(\s+<i>)', r'\1eds.\3', item)
Пример #18
0
class ProcessorTest(object):
    def __init__(self, filename):
        with open(filename, encoding='UTF-8') as f:
            self.json_data = json.load(f)

        csl_io = io.BytesIO(utf_8_encode(self.json_data['csl'])[0])
        self.style = CitationStylesStyle(csl_io)
        self._fix_input(self.json_data['input'])
        self.references = [item['id'] for item in self.json_data['input']]
        self.references_dict = CiteProcJSON(self.json_data['input'])
        self.bibliography = CitationStylesBibliography(self.style,
                                                       self.references_dict)
        self.expected = self.json_data['result'].splitlines()

    @staticmethod
    def _fix_input(input_data):
        for i, ref in enumerate(input_data):
            if 'id' not in ref:
                ref['id'] = i
            if 'type' not in ref:
                ref['type'] = 'book'

    def execute(self):
        if self.json_data['citation_items']:
            citations = [
                self.parse_citation(item)
                for item in self.json_data['citation_items']
            ]
        elif self.json_data['citations']:
            citations = []
            for cit in self.json_data['citations']:
                cit = cit[0]
                citation_items = [
                    self.parse_citation_item(cititem)
                    for cititem in cit['citationItems']
                ]
                citation = Citation(citation_items)
                citation.key = cit['citationID']
                citation.note_index = cit['properties']['noteIndex']
                citations.append(citation)
        elif self.json_data['bibentries']:
            citation_items = [
                self.parse_citation_item({'id': entry})
                for entry in self.json_data['bibentries'][-1]
            ]
            citations = [Citation(citation_items)]
        else:
            citation_items = [
                self.parse_citation_item({'id': ref})
                for ref in self.references
            ]
            citations = [Citation(citation_items)]

        for citation in citations:
            self.bibliography.register(citation)

        if self.style.has_bibliography():
            self.bibliography.sort()

        results = []
        do_nothing = lambda x: None  # callback passed to cite()
        if self.json_data['mode'] == 'citation':
            if self.json_data['citations']:
                for i, citation in enumerate(citations):
                    if i == len(citations) - 1:
                        dots_or_other = '>>'
                    else:
                        dots_or_other = '..'
                    results.append(
                        '{}[{}] '.format(dots_or_other, i) +
                        self.bibliography.cite(citation, do_nothing))
            else:
                for citation in citations:
                    results.append(self.bibliography.cite(
                        citation, do_nothing))
        elif self.json_data['mode'] in ('bibliography', 'bibliography-nosort'):
            results.append(self.bibliography.bibliography())

        return results

    def parse_citation(self, citation_data):
        citation_items = []
        for item in citation_data:
            citation_item = self.parse_citation_item(item)
            citation_items.append(citation_item)

        return Citation(citation_items)

    def parse_citation_item(self, citation_item_data):
        options = {}
        for key, value in citation_item_data.items():
            python_key = key.replace('-', '_')
            if python_key == 'id':
                reference_key = str(value)
                continue
            elif python_key == 'locator':
                try:
                    options['locator'] = Locator(citation_item_data['label'],
                                                 value)
                except KeyError:
                    # some tests don't specify the label
                    options['locator'] = Locator('page', value)
            elif python_key == 'label':
                pass
            else:
                options[python_key] = value

        return CitationItem(reference_key, **options)
Пример #19
0
class CSLRenderer(object):
    """
        Interface with citeproc-py CSL library
    """
    def __init__(self, doc, style):
        self.citations = {}
        self.doc = doc
        style = style.lower()

        bib_source = self.convertReferencesToJSON()
        bib_source = CiteProcJSON(bib_source)
        bib_style = CitationStylesStyle(os.path.join(CSL_PATH, style),
                                        validate=False)

        self.bibliography = CitationStylesBibliography(bib_style, bib_source,
                                                       formatter.html)
        self.prepareCitations()

    def convertReferencesToJSON(self):
        """
            Converts a SciDoc's references into the JSON format expected
        """
        def copyFields(dict1, dict2, field_list):
            for field in field_list:
                if field[0] in dict1:
                    dict2[field[1]] = dict1[field[0]]

        res = []
        for ref in self.doc.references:
            newref = {}
            copyFields(
                ref,
                newref,
                # new = old
                [
                    ("id", "id"),
                    ("title", "title"),
                    ("authors", "author"),
                    ("publisher", "publisher-name"),
                    ("title", "title"),
                    ("type", "type"),
                    ("URL", "url"),
                ])
            newref["issued"] = {"date-parts": [(ref.get("year", "0"), )]}
            res.append(newref)
        return res

    def prepareCitations(self):
        """
        """
        for cit in self.doc.citations:
            self.citations[cit["id"]] = Citation([CitationItem(cit["ref_id"])])
            self.bibliography.register(self.citations[cit["id"]])

    def getCitationText(self, cit):
        """
        """
        warn = lambda x: None
        return self.bibliography.cite(self.citations[cit["id"]], warn)

    def getBibliography(self):
        """
            Returns the formatted bibliography
        """
        return [str(item) for item in self.bibliography.bibliography()]
Пример #20
0
def render_bibliography(docs=None, format='html', locale='', style='', commit_link=False, commit_system=''):

    if docs is None:
        docs = []

    publist = ''
    # logging.debug('csl-docs: %s' % docs)
    if len(docs) > 0:

        locales_url = secrets.CITEPROC_LOCALES_FILE

        with open(locales_url) as data_file:
            locales = json.load(data_file)

        bib_source = CiteProcJSON(docs)
        # load a CSL style (from the current directory)
        locale = '%s/csl-locales/locales-%s' % (secrets.CSL_DATA_DIR, locales.get('primary-dialects').get(locale))
        # logging.info('locale: %s' % locale)
        bib_style = CitationStylesStyle('%s/csl/%s' % (secrets.CSL_DATA_DIR, style),
                                        locale=locale,
                                        validate=False)
        # Create the citeproc-py bibliography, passing it the:
        # * CitationStylesStyle,
        # * BibliographySource (CiteProcJSON in this case), and
        # * a formatter (plain, html, or you can write a custom formatter)
        bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.html)
        # get a list of the item ids and register them to the bibliography object

        def warn(citation_item):
            logging.warning(
                "WARNING: Reference with key '{}' not found in the bibliography.".format(citation_item.key)
            )

        for item in docs:
            citation = Citation([CitationItem(item.get('id'))])
            bibliography.register(citation)
            bibliography.cite(citation, warn)

        # And finally, the bibliography can be rendered.
        if format == 'html':
            publist += '<div class="csl-bib-body">'

        idx = 0
        for item in bibliography.bibliography():
            # TODO Formatierung
            # logging.info('CSL item: %s' % item)
            # logging.info('CSL item ID: %s' % docs[idx].get('id'))
            if format == 'html':
                publist += '<div class="csl-entry">'
                if commit_link:
                    publist += '<span class="glyphicon glyphicon-minus" aria-hidden="true"></span> '

            if format == 'html':
                urls = re.findall(urlmarker.URL_REGEX, str(item))
                # logging.info(urls)

                for url in urls:
                    item = item.replace(url, '<a href="%s">%s</a>' % (url, url))

            publist += str(item)

            if commit_link and commit_system:
                if commit_system == 'crossref':
                    publist += ' <span class="glyphicon glyphicon-transfer" aria-hidden="true"></span> <a href="%s?doi=%s">%s</a>' % (url_for("new_by_identifiers"), docs[idx].get('id'), lazy_gettext('Use this Record'))
                else:
                    publist += ' <span class="glyphicon glyphicon-transfer" aria-hidden="true"></span> <a href="%s?source=%s&id=%s">%s</a>' % (url_for("new_by_identifiers"), commit_system, docs[idx].get('id'), lazy_gettext('Use this Record'))

            if format == 'html':
                publist += '</div>'

            idx += 1

        if format == 'html':
            publist += '</div>'

    return publist
Пример #21
0
class CSL:

    REGEX_TOKENIZE_CITA = re.compile(r'^(.*)\(?(\d{4})\)?')
    REGEX_TOKENIZE_BIBLIO = re.compile(r'^(.*?)(\\?\s*\d+.*)')

    def __init__(self,
                 for_cls,
                 csl_style,
                 export_format=adsFormatter.unicode,
                 journal_format=adsJournalFormat.default):
        """

        :param for_cls: input data for this class
        :param csl_style: export journal style
        :param export_format: export format
        """
        self.for_cls = for_cls
        self.csl_style = csl_style
        self.export_format = export_format
        self.journal_format = journal_format
        self.citation_item = []
        self.bibcode_list = []

        self.__update_title()

        # Process the JSON data to generate a citaproc-py BibliographySource.
        bib_source = CiteProcJSON(self.for_cls)

        csl_style_fullpath = os.path.realpath(__file__ + "/../../cslstyles")

        # load a CSL style (from the current directory)
        bib_style = CitationStylesStyle(os.path.join(csl_style_fullpath + '/' +
                                                     csl_style),
                                        validate=False)

        # Create the citaproc-py bibliography, passing it the:
        # * CitationStylesStyle,
        # * BibliographySource (CiteProcJSON in this case), and
        # * a formatter (plain, html, or you can write a custom formatter)
        # we are going to have CSL format everything using html and then format it as we need to match the
        # classic output
        self.bibliography = CitationStylesBibliography(bib_style, bib_source,
                                                       formatter.html)

        # Processing citations in a document needs to be done in two passes as for some
        # CSL styles, a citation can depend on the order of citations in the
        # bibliography and thus on citations following the current one.
        # For this reason, we first need to register all citations with the
        # CitationStylesBibliography.

        for item in self.for_cls:
            citation = Citation([CitationItem(item['id'])])
            self.citation_item.append(citation)
            self.bibliography.register(citation)
            # this is actually a bibcode that was passed in, but we have to use
            # one of CSLs predefined ids
            self.bibcode_list.append(''.join(item.get('locator', '')))

    def __update_title(self):
        """
        Update the container-title if needed for the specific style
        also apply latex encoding if needed for both title and container-title

        :return:
        """
        # for mnras we need abbreviation of the journal names
        # available from adsutils
        if (self.csl_style == 'mnras'):
            for data in self.for_cls:
                data['container-title'] = Format(None).get_pub_abbrev(
                    data['bibstem'])
                data['title'] = encode_laTex(data['title'])
        elif (self.csl_style == 'aastex') or (self.csl_style
                                              == 'aasj') or (self.csl_style
                                                             == 'aspc'):
            # use macro (default)
            if self.journal_format == adsJournalFormat.macro or self.journal_format == adsJournalFormat.default:
                journal_macros = dict([
                    (k, v) for k, v in
                    current_app.config['EXPORT_SERVICE_AASTEX_JOURNAL_MACRO']
                ])
                for data in self.for_cls:
                    data['container-title'] = journal_macros.get(
                        Format(None).get_bibstem(data['bibstem']),
                        encode_laTex(data['container-title']))
                    data['title'] = encode_laTex(data['title'])
            elif self.journal_format == adsJournalFormat.abbreviated:
                for data in self.for_cls:
                    data['container-title'] = Format(None).get_pub_abbrev(
                        data['bibstem'])
                    data['title'] = encode_laTex(data['title'])
            elif self.journal_format == adsJournalFormat.full:
                for data in self.for_cls:
                    data['container-title'] = encode_laTex(
                        data['container-title'])
                    data['title'] = encode_laTex(data['title'])
        # for SoPh we use journal abbreviation for some special journals only
        elif (self.csl_style == 'soph'):
            journal_abbrevation = current_app.config[
                'EXPORT_SERVICE_SOPH_JOURNAL_ABBREVIATION']
            for data in self.for_cls:
                data['container-title'] = journal_abbrevation.get(
                    Format(None).get_bibstem(data['bibstem']),
                    encode_laTex(data['container-title']))
                data['title'] = encode_laTex(data['title'])
        # for the rest just run title and container-title through latex encoding
        elif (self.csl_style == 'icarus') or (self.csl_style == 'apsj'):
            for data in self.for_cls:
                data['container-title'] = encode_laTex(data['container-title'])
                data['title'] = encode_laTex(data['title'])

    def __update_author_etal(self, author, the_rest, bibcode):
        """

        :param author:
        :param bibcode:
        :return:
        """
        # for icarus we need to add # authors beside the first author
        # in case more authors were available CSL would turn it into first author name et. al.
        # hence, from CSL we get something like Siltala, J. et al.\
        # but we need to turn it to Siltala, J., and 12 colleagues
        if (self.csl_style == 'icarus'):
            if (' et al.' in author):
                for data in self.for_cls:
                    if (data['locator'] == bibcode):
                        author = author.replace(
                            ' et al.', ', and {} colleagues'.format(
                                len(data['author']) - 1))
                        the_rest = the_rest.lstrip('\\')
        elif (self.csl_style == 'soph'):
            if ('et al.' in author):
                author = author.replace('et al.', 'and, ...')
        return author, the_rest

    def __update_author_etal_add_emph(self, author):
        """

        :param author:
        :return:
        """
        # for Solar Physics we need to put et al. in \emph, which was not do able on the CLS
        # side, and hence we need to add it here
        # but note that it only applies if the output format is in latex format
        if (self.csl_style == 'soph'):
            if ('et al.' in author) and (self.export_format
                                         == adsFormatter.latex):
                return author.replace('et al.', '\emph{et al.}')
        return author

    def __tokenize_cita(self, cita):
        """

        :param cita: citation
        :return:
        """
        # cita (citation) is author(s) followed by year inside a parentheses
        # first remove the parentheses and then split the author and year fields
        cita = self.REGEX_TOKENIZE_CITA.findall(cita[1:-1])
        cita_author, cita_year = cita[0]
        return cita_author.strip(' ').rstrip('('), cita_year.strip(' ')

    def __tokenize_biblio(self, biblio):
        """

        :param biblio: bibliography
        :return:
        """
        # split the author and rest of biblio
        biblio = self.REGEX_TOKENIZE_BIBLIO.findall(biblio)
        biblio_author, biblio_rest = biblio[0]
        return biblio_author, biblio_rest

    def __format_output(self, cita, biblio, bibcode, index):
        """

        :param cita:
        :param biblio:
        :param bibcode:
        :param index:
        :return:
        """
        # apsj is a special case, display biblio as csl has format, just adjust translate characters for LaTex
        if (self.csl_style == 'apsj'):
            cita_author, cita_year = '', ''
            biblio_author = cita
            biblio_rest = biblio.replace(cita, '')
            # do not need this, but since we are sending the format all the fields, empty bibcode
            bibcode = ''
        else:
            cita_author, cita_year = self.__tokenize_cita(cita)
            biblio_author, biblio_rest = self.__tokenize_biblio(biblio)

        # encode author if latex format
        if (self.export_format == adsFormatter.latex):
            cita_author = encode_laTex_author(cita_author)
            biblio_author = encode_laTex_author(biblio_author)

        # some adjustments to the what is returned from CSL that we can not do with CSL
        cita_author = html_to_laTex(
            self.__update_author_etal_add_emph(cita_author))
        biblio_author, biblio_rest = self.__update_author_etal(
            biblio_author, biblio_rest, bibcode)
        biblio_author = html_to_laTex(biblio_author)
        biblio_rest = html_to_laTex(biblio_rest)

        format_style = {
            'mnras':
            u'\\bibitem[\\protect\\citeauthoryear{{{}}}{{{}}}]{{{}}} {}{}',
            'icarus': u'\\bibitem[{}({})]{{{}}} {}{}',
            'soph': u'\\bibitem[{}({})]{{{}}}{}{}',
            'aastex': u'\\bibitem[{}({})]{{{}}} {}{}',
            'aspc': u'\\bibitem[{}({})]{{{}}} {}{}',
            'aasj': u'\\bibitem[{}({})]{{{}}} {}{}',
            'apsj': u'{}{}{}{}{}'
        }
        return format_style[self.csl_style].format(cita_author, cita_year,
                                                   bibcode, biblio_author,
                                                   biblio_rest)

    def get(self, export_organizer=adsOrganizer.plain):
        """

        :param export_organizer: output format, default is plain
        :return: for adsOrganizer.plain returns the result of formatted records in a dict
        """
        results = []
        if (export_organizer == adsOrganizer.plain):
            num_docs = 0
            if (self.export_format
                    == adsFormatter.unicode) or (self.export_format
                                                 == adsFormatter.latex):
                num_docs = len(self.bibcode_list)
                for cita, item, bibcode, i in zip(
                        self.citation_item, self.bibliography.bibliography(),
                        self.bibcode_list, range(len(self.bibcode_list))):
                    results.append(
                        self.__format_output(
                            str(self.bibliography.cite(cita, '')), str(item),
                            bibcode, i + 1) + '\n')
            result_dict = {}
            result_dict[
                'msg'] = 'Retrieved {} abstracts, starting with number 1.'.format(
                    num_docs)
            result_dict['export'] = ''.join(result for result in results)
            return result_dict
        if (export_organizer == adsOrganizer.citation_bibliography):
            for cita, item, bibcode in zip(self.citation_item,
                                           self.bibliography.bibliography(),
                                           self.bibcode_list):
                results.append(bibcode + '\n' +
                               str(self.bibliography.cite(cita, '')) + '\n' +
                               str(item) + '\n')
            return ''.join(result for result in results)
        if (export_organizer == adsOrganizer.bibliography):
            for item in self.bibliography.bibliography():
                results.append(html_to_laTex(str(item)))
            return results
        return None
Пример #22
0
class ProcessorTest(object):
    def __init__(self, filename):
        with open(filename, encoding='UTF-8') as f:
            self.json_data = json.load(f)

        csl_io = io.BytesIO(utf_8_encode(self.json_data['csl'])[0])
        self.style = CitationStylesStyle(csl_io)
        self._fix_input(self.json_data['input'])
        self.references = [item['id'] for item in self.json_data['input']]
        self.references_dict = CiteProcJSON(self.json_data['input'])
        self.bibliography = CitationStylesBibliography(self.style,
                                                       self.references_dict)
        self.expected = self.json_data['result'].splitlines()

    @staticmethod
    def _fix_input(input_data):
        for i, ref in enumerate(input_data):
            if 'id' not in ref:
                ref['id'] = i
            if 'type' not in ref:
                ref['type'] = 'book'

    def execute(self):
        if self.json_data['citation_items']:
            citations = [self.parse_citation(item)
                         for item in self.json_data['citation_items']]
        elif self.json_data['citations']:
            citations = []
            for cit in self.json_data['citations']:
                cit = cit[0]
                citation_items = [self.parse_citation_item(cititem)
                                  for cititem in cit['citationItems']]
                citation = Citation(citation_items)
                citation.key = cit['citationID']
                citation.note_index = cit['properties']['noteIndex']
                citations.append(citation)
        elif self.json_data['bibentries']:
            citation_items = [self.parse_citation_item({'id': entry})
                              for entry in self.json_data['bibentries'][-1]]
            citations = [Citation(citation_items)]
        else:
            citation_items = [self.parse_citation_item({'id': ref})
                              for ref in self.references]
            citations = [Citation(citation_items)]

        for citation in citations:
            self.bibliography.register(citation)

        if self.style.has_bibliography():
            self.bibliography.sort()

        results = []
        do_nothing = lambda x: None     # callback passed to cite()
        if self.json_data['mode'] == 'citation':
            if self.json_data['citations']:
                for i, citation in enumerate(citations):
                    if i == len(citations) - 1:
                        dots_or_other = '>>'
                    else:
                        dots_or_other = '..'
                    results.append('{}[{}] '.format(dots_or_other, i) +
                                   self.bibliography.cite(citation, do_nothing))
            else:
                for citation in citations:
                    results.append(self.bibliography.cite(citation, do_nothing))
        elif self.json_data['mode'] in ('bibliography', 'bibliography-nosort'):
            results.append(self.bibliography.bibliography())

        return results

    def parse_citation(self, citation_data):
        citation_items = []
        for item in citation_data:
            citation_item = self.parse_citation_item(item)
            citation_items.append(citation_item)

        return Citation(citation_items)

    def parse_citation_item(self, citation_item_data):
        options = {}
        for key, value in citation_item_data.items():
            python_key = key.replace('-', '_')
            if python_key == 'id':
                reference_key = str(value)
                continue
            elif python_key == 'locator':
                try:
                    options['locator'] = Locator(citation_item_data['label'],
                                                 value)
                except KeyError:
                    # some tests don't specify the label
                    options['locator'] = Locator('page', value)
            elif python_key == 'label':
                pass
            else:
                options[python_key] = value

        return CitationItem(reference_key, **options)
Пример #23
0
class ProcessorTest(object):
    """Parses atest fixture and provides a method for processing the tests
    defined in it."""
    bib_prefix = '<div class="csl-bib-body">'
    bib_suffix = '</div>'
    item_prefix = '  <div class="csl-entry">'
    item_suffix = '</div>'

    def __init__(self, filename):
        self.csl_test = CslTest({}, None, (filename, ),
                                os.path.basename(filename))
        self.csl_test.parse()
        csl_io = io.BytesIO(utf_8_encode(self.data['csl'])[0])
        self.style = CitationStylesStyle(csl_io)
        self._fix_input(self.data['input'])
        self.references = [item['id'] for item in self.data['input']]
        self.references_dict = CiteProcJSON(self.data['input'])
        self.bibliography = CitationStylesBibliography(self.style,
                                                       self.references_dict)
        self.expected = self.data['result'].splitlines()

    @property
    def data(self):
        return self.csl_test.data

    @staticmethod
    def _fix_input(input_data):
        for i, ref in enumerate(input_data):
            if 'id' not in ref:
                ref['id'] = i
            if 'type' not in ref:
                ref['type'] = 'book'

    def execute(self):
        if self.data['citation_items']:
            citations = [
                self.parse_citation(item)
                for item in self.data['citation_items']
            ]
        elif self.data['citations']:
            citations = []
            for cit in self.data['citations']:
                cit = cit[0]
                citation_items = [
                    self.parse_citation_item(cititem)
                    for cititem in cit['citationItems']
                ]
                citation = Citation(citation_items)
                if 'citationID' in cit:
                    citation.key = cit['citationID']
                citation.note_index = cit['properties']['noteIndex']
                citations.append(citation)
        elif self.data['bibentries']:
            citation_items = [
                self.parse_citation_item({'id': entry})
                for entry in self.data['bibentries'][-1]
            ]
            citations = [Citation(citation_items)]
        else:
            citation_items = [
                self.parse_citation_item({'id': ref})
                for ref in self.references
            ]
            citations = [Citation(citation_items)]

        for citation in citations:
            self.bibliography.register(citation)

        if self.style.has_bibliography():
            self.bibliography.sort()

        results = []
        do_nothing = lambda x: None  # callback passed to cite()
        if self.data['mode'] == 'citation':
            if self.data['citations']:
                for i, citation in enumerate(citations):
                    if i == len(citations) - 1:
                        dots_or_other = '>>'
                    else:
                        dots_or_other = '..'
                    results.append(
                        '{}[{}] '.format(dots_or_other, i) +
                        self.bibliography.cite(citation, do_nothing))
            else:
                for citation in citations:
                    results.append(self.bibliography.cite(
                        citation, do_nothing))
        elif self.data['mode'] in ('bibliography', 'bibliography-nosort'):
            results.append(self.bib_prefix)
            for entry in self.bibliography.bibliography():
                text = self.item_prefix + str(entry) + self.item_suffix
                results.append(text)
            results.append(self.bib_suffix)
        return results

    def parse_citation(self, citation_data):
        citation_items = []
        for item in citation_data:
            citation_item = self.parse_citation_item(item)
            citation_items.append(citation_item)
        return Citation(citation_items)

    def parse_citation_item(self, citation_item_data):
        options = {}
        for key, value in citation_item_data.items():
            python_key = key.replace('-', '_')
            if python_key == 'id':
                reference_key = str(value)
                continue
            elif python_key == 'locator':
                try:
                    options['locator'] = Locator(citation_item_data['label'],
                                                 value)
                except KeyError:
                    # some tests don't specify the label
                    options['locator'] = Locator('page', value)
            elif python_key == 'label':
                pass
            else:
                options[python_key] = value
        return CitationItem(reference_key, **options)
Пример #24
0
def bib(style_path,
        ds,
        return_cites_and_keys = False,
        formatter = "chocolate",
        apa_tweaks = True,
        # The below options are ignored unless apa_tweaks is on.
        always_include_issue = False,
        include_isbn = False,
        url_after_doi = False,
        publisher_website = True,
        abbreviate_given_names = True):

    if isinstance(formatter, str):
        try:             formatter = formatter_from_name[formatter]
        except KeyError: raise ValueError('Unknown formatter "{}"'.format(formatter))        

    style = get_style(style_path, apa_tweaks,
        include_isbn, url_after_doi, abbreviate_given_names)

    ds = deepcopy(ds)
    if apa_tweaks:
    # Distinguish entries that would have identical authors and years
    # by adding suffixes to the years.
        # Group works by author and year.
        #
        # (Actually, we use only an initial subset of authors,
        # the same number that would be included in an inline citation
        # after the first inline citation. This is 2 for 2 authors
        # and 1 otherwise.)
        ay = defaultdict(list)
        for d in ds:
            names = d.get('author') or d.get('editor')
            if len(names) != 2:
                names = [names[0]]
            k = repr(names)  + '/' + str(d['issued']['date-parts'][0][0])
            if not any(d is v for v in ay[k]):
                ay[k].append(d)
        # If any group has more than one element, add suffixes.
        for v in ay.values():
            if len(v) > 1:
                for i, d in enumerate(sorted(v, key = title_sort_key)):
                   d['year_suffix'] = ascii_lowercase[i]
    for d in ds:
        if 'id' not in d:
            d['id'] = str(random())
        for k in list(d.keys()):
            if d[k] is None: del d[k]
        if apa_tweaks:
            # By default, don't include the issue number for
            # journal articles.
            if not always_include_issue and d['type'] == 'article-journal':
                delf(d, 'issue')
            # Use the weird "Retrieved from Dewey, Cheatem, &
            # Howe website: http://example.com" format prescribed
            # for reports.
            if publisher_website and d['type'] == 'report' and 'publisher' in d and 'URL' in d:
                d['URL'] = '{} website: {}'.format(
                    d.pop('publisher'), d['URL'])
            # Add structure words for presentations and include
            # the event place.
            if d['type'] == 'speech' and d['genre'] == 'paper':
                d['event'] = 'meeting of the {}, {}'.format(
                    d.pop('publisher'), d['event-place'])
            if d['type'] == 'speech' and d['genre'] == 'video':
                d['medium'] = 'Video file'
                del d['genre']
            # Format encyclopedia entries like book chapters.
            if d['type'] == 'entry-encyclopedia':
                d['type'] = 'chapter'
            # When abbreviating given names, remove hyphens
            # preceding lowercase letters. Otherwise, weird
            # stuff happens.
            if abbreviate_given_names and 'author' in d:
               for a in d['author']:
                   if 'given' in a:
                       a['given'] = sub(
                           '-(.)',
                           lambda mo:
                               ("" if mo.group(1).islower() else "-") +
                               mo.group(1),
                           a['given'])

    bibliography = CitationStylesBibliography(
        style,
        CiteProcJSON(ds),
        formatter)
    cites = [ Citation([CitationItem(d['id'])]) for d in ds ]
    for c in cites: bibliography.register(c)
    def sort_key_f(item):
        ref = item.reference
        names = [(name['family'].lower(), name['given'][0].lower() if 'given' in name else '')
            for name in ref.get('author') or ref.get('editor')]
        return (names, ref['issued']['year'],
            title_sort_key(ref),
            ref['page']['first'] if 'page' in ref else '')
    if len(ds) > 1:
        # Sort the bibliography
        # bibliography.sort()   # Doesn't appear to handle leading "the"s correctly.
        bibliography.items = sorted(bibliography.items, key = sort_key_f)
        bibliography.keys = [item.key for item in bibliography.items]
    bibl = bibliography.bibliography()

    for i, s in enumerate(bibl):
        s = ''.join(s)
        # Fix spacing and punctuation issues.
        s = s.replace('  ', ' ')
        s = sub(r'([.!?…])\.', r'\1', s)
        if apa_tweaks:
            if formatter is citeproc.formatter.html or formatter is chocolate:
                # Italicize the stuff between a journal name and a volume
                # number.
                s = sub(r'</i>, <i>(\d)', r', \1', s)
                # Remove redundant periods that are separated
                # from the first end-of-sentence mark by an </i>
                # tag.
                s = sub(r'([.!?…]</i>)\.', r'\1', s)
            # If there are two authors and the first is a mononym,
            # remove the comma after it.
            s = sub('^([^.,]+), &', r'\1 &', s)
        bibl[i] = s

    if return_cites_and_keys:
        fcites = [bibliography.cite(c, lambda x: None) for c in cites]
        return (fcites, bibliography.keys, bibl)
    else:
        return bibl