Пример #1
0
    def __get_journal(self, a_doc, journalformat):
        """
        let client decide on the format of journal, macro if one is available, abbreviated journal name, or full journal name
        note that for doctype = software this field is ignored

        :param a_doc:
        :param journalformat
        :return:
        """
        if a_doc.get('doctype', '') == 'software':
            return None

        # use macro (default)
        if journalformat == adsJournalFormat.macro or journalformat == adsJournalFormat.default:
            journal_macros = dict([
                (k, v) for k, v in
                current_app.config['EXPORT_SERVICE_AASTEX_JOURNAL_MACRO']
            ])
            return journal_macros.get(
                self.get_bibstem(a_doc.get('bibstem', '')),
                encode_laTex(''.join(a_doc.get('pub', ''))))
        elif journalformat == adsJournalFormat.abbreviated:
            return Format(None).get_pub_abbrev(a_doc.get('bibstem', ''))
        elif journalformat == adsJournalFormat.full:
            return encode_laTex(''.join(a_doc.get('pub', '')))
Пример #2
0
    def __update_title(self):
        """
        Update the container-title if needed for the specific style
        also apply latex encoding if needed for both title and container-title

        :return:
        """
        # for mnras we need abbreviation of the journal names
        # available from adsutils
        if (self.csl_style == 'mnras'):
            for data in self.for_cls:
                data['container-title'] = Format(None).get_pub_abbrev(
                    data['bibstem'])
                data['title'] = encode_laTex(data['title'])
        elif (self.csl_style == 'aastex') or (self.csl_style
                                              == 'aasj') or (self.csl_style
                                                             == 'aspc'):
            # use macro (default)
            if self.journal_format == adsJournalFormat.macro or self.journal_format == adsJournalFormat.default:
                journal_macros = dict([
                    (k, v) for k, v in
                    current_app.config['EXPORT_SERVICE_AASTEX_JOURNAL_MACRO']
                ])
                for data in self.for_cls:
                    data['container-title'] = journal_macros.get(
                        Format(None).get_bibstem(data['bibstem']),
                        encode_laTex(data['container-title']))
                    data['title'] = encode_laTex(data['title'])
            elif self.journal_format == adsJournalFormat.abbreviated:
                for data in self.for_cls:
                    data['container-title'] = Format(None).get_pub_abbrev(
                        data['bibstem'])
                    data['title'] = encode_laTex(data['title'])
            elif self.journal_format == adsJournalFormat.full:
                for data in self.for_cls:
                    data['container-title'] = encode_laTex(
                        data['container-title'])
                    data['title'] = encode_laTex(data['title'])
        # for SoPh we use journal abbreviation for some special journals only
        elif (self.csl_style == 'soph'):
            journal_abbrevation = current_app.config[
                'EXPORT_SERVICE_SOPH_JOURNAL_ABBREVIATION']
            for data in self.for_cls:
                data['container-title'] = journal_abbrevation.get(
                    Format(None).get_bibstem(data['bibstem']),
                    encode_laTex(data['container-title']))
                data['title'] = encode_laTex(data['title'])
        # for the rest just run title and container-title through latex encoding
        elif (self.csl_style == 'icarus') or (self.csl_style == 'apsj'):
            for data in self.for_cls:
                data['container-title'] = encode_laTex(data['container-title'])
                data['title'] = encode_laTex(data['title'])
Пример #3
0
    def __get_affiliation_list(self, a_doc, maxauthor, authorcutoff):
        """
        format affiliation

        :param a_doc:
        :return:
        """
        if 'aff' not in a_doc:
            return ''
        counter = self.generate_counter_id(
            maxauthor if maxauthor != 0 else len(a_doc['aff']))
        separator = ', '
        affiliation_list = ''
        affiliation_count = 0
        # if number of affiliations exceed the maximum that we display, cut to shorter list
        # only if maxauthor is none zero (note number of authors and number of affiliations displayed should match),
        # zero is indication of return all available affiliations
        cut_affiliations = (len(a_doc['aff']) >
                            authorcutoff) and not maxauthor == 0
        addCount = not (a_doc.get('doctype', '')
                        in ['phdthesis', 'mastersthesis'])
        for affiliation, i in zip(a_doc['aff'], range(len(a_doc['aff']))):
            if (addCount):
                affiliation_list += counter[
                    i] + '(' + affiliation + ')' + separator
            else:
                affiliation_list += affiliation + separator
            if cut_affiliations and i + 1 == maxauthor:
                # if reached number of required affiliations stop
                break
        # do not need the last separator
        if (len(affiliation_list) > len(separator)):
            affiliation_list = affiliation_list[:-len(separator)]
        return encode_laTex(affiliation_list)
Пример #4
0
    def __add_keywords(self, a_doc):
        """
        format keywords

        :param a_doc:
        :return:
        """
        if 'keyword' not in a_doc:
            return ''
        return encode_laTex(', '.join(a_doc.get('keyword', '')))
Пример #5
0
 def __add_abstract(self, a_doc):
     """
     
     :param a_doc: 
     :return: 
     """
     abstract = a_doc.get('abstract',
                          '').replace('<P />',
                                      '\\\\').replace('<BR />', '\\')
     return encode_laTex(abstract)
Пример #6
0
 def __add_clean_pub_raw(self, a_doc):
     """
     parse pub_raw and eliminate tags
     
     :param a_doc: 
     :return: 
     """
     pub_raw = ''.join(a_doc.get('pub_raw', ''))
     # proceed only if necessary
     if ('<' in pub_raw) and ('>' in pub_raw):
         for key in self.REGEX_PUB_RAW:
             pub_raw = key.sub(self.REGEX_PUB_RAW[key], pub_raw)
     return encode_laTex(pub_raw)
Пример #7
0
    def __encode_latex(self, value, field):
        """

        :param value:
        :param field:
        :return:
        """
        if (field == 'author'):
            return encode_laTex_author(value)
        # do not encode publication when the format is a macro or if it is bibcode
        if ((field == 'pub') and
            (self.REGEX_PUB_MACRO.match(value))) or (field == 'bibcode'):
            return value
        return encode_laTex(value)
Пример #8
0
    def __get_journal(self, a_doc, journalformat):
        """
        let client decide on the format of journal, macro if one is available, abbreviated journal name, or full journal name
        note that for doctype = software this field is ignored

        :param a_doc:
        :param journalformat
        :return:
        """
        doctype = a_doc.get('doctype', '')

        if doctype == 'software':
            return None

        # apply user preference only if pub is assigned to journal field
        # pub is displayed for booktitle and how_published, in which case it should appears in full
        need_full_pub = [
            'inbook', 'proceedings', 'inproceedings', 'abstract', 'misc',
            'proposal', 'pressrelease', 'talk'
        ]
        if doctype in need_full_pub:
            return encode_laTex(''.join(a_doc.get('pub', '')))

        # use macro (default)
        if journalformat == adsJournalFormat.macro or journalformat == adsJournalFormat.default:
            journal_macros = dict([
                (k, v) for k, v in
                current_app.config['EXPORT_SERVICE_AASTEX_JOURNAL_MACRO']
            ])
            return journal_macros.get(
                self.get_bibstem(a_doc.get('bibstem', '')),
                encode_laTex(''.join(a_doc.get('pub', ''))))
        elif journalformat == adsJournalFormat.abbreviated:
            return Format(None).get_pub_abbrev(a_doc.get('bibstem', ''))
        elif journalformat == adsJournalFormat.full:
            return encode_laTex(''.join(a_doc.get('pub', '')))
Пример #9
0
    def __format_output(self, cita, biblio, bibcode, index):
        """

        :param cita:
        :param biblio:
        :param bibcode:
        :param index:
        :return:
        """
        # apsj is a special case, display biblio as csl has format, just adjust translate characters for LaTex
        if (self.csl_style == 'apsj'):
            cita_author, cita_year = '', ''
            biblio_author = cita
            biblio_rest = biblio.replace(cita, '')
            # do not need this, but since we are sending the format all the fields, empty bibcode
            bibcode = ''
        else:
            cita_author, cita_year = self.__tokenize_cita(cita)
            biblio_author, biblio_rest = self.__tokenize_biblio(biblio)

        # encode latex stuff
        if (self.export_format == adsFormatter.latex):
            cita_author = encode_laTex_author(cita_author)
            biblio_author = encode_laTex_author(biblio_author)
            biblio_rest = encode_laTex(biblio_rest)

        # some adjustments to the what is returned from CSL that we can not do with CSL
        cita_author = html_to_laTex(
            self.__update_author_etal_add_emph(cita_author))
        biblio_author = html_to_laTex(
            self.__update_author_etal(str(biblio_author), bibcode))
        biblio_rest = html_to_laTex(biblio_rest)

        format_style = {
            'mnras':
            u'\\bibitem[\\protect\\citeauthoryear{{{}}}{{{}}}]{{{}}} {}{}',
            'icarus': u'\\bibitem[{}({})]{{{}}} {}{}',
            'soph': u'\\bibitem[{}({})]{{{}}}{}{}',
            'aastex': u'\\bibitem[{}({})]{{{}}} {}{}',
            'aspc': u'\\bibitem[{}({})]{{{}}} {}{}',
            'aasj': u'\\bibitem[{}({})]{{{}}} {}{}',
            'apsj': u'{}{}{}{}{}'
        }
        return format_style[self.csl_style].format(cita_author, cita_year,
                                                   bibcode, biblio_author,
                                                   biblio_rest)
Пример #10
0
    def __encode(self, text, name):
        """

        :param text:
        :param name:
        :return:
        """
        if (self.export_format == adsFormatter.unicode):
            return text
        if (self.export_format == adsFormatter.html):
            return cgi.escape(text)
        if (self.export_format == adsFormatter.latex):
            if (name == 'author'):
                return encode_laTex_author(text)
            # do not encode publication since it could be the macro
            if (name == 'pub'):
                return text
            return encode_laTex(text)
        return text
Пример #11
0
    def __get_affiliation_list(self, a_doc):
        """
        format affiliation

        :param a_doc:
        :return:
        """
        if ('aff') not in a_doc:
            return ''
        counter = [''.join(i) for i in product(ascii_uppercase, repeat=2)]
        separator = ', '
        affiliation_list = ''
        addCount = not (a_doc.get('doctype', '')
                        in ['phdthesis', 'mastersthesis'])
        for affiliation, i in zip(a_doc['aff'], range(len(a_doc['aff']))):
            if (addCount):
                affiliation_list += counter[
                    i] + '(' + affiliation + ')' + separator
            else:
                affiliation_list += affiliation + separator
        # do not need the last separator
        if (len(affiliation_list) > len(separator)):
            affiliation_list = affiliation_list[:-len(separator)]
        return encode_laTex(affiliation_list)
Пример #12
0
    def __get_doc(self, index, include_abs, maxauthor, authorcutoff,
                  journalformat):
        """
        for each document from Solr, get the fields, and format them accordingly

        :param index:
        :param include_abs:
        :param maxauthor:
        :param authorcutoff:
        :param journalformat:
        :return:
        """
        format_style_bracket_quotes = u'{0:>13} = "{{{1}}}"'
        format_style_bracket = u'{0:>13} = {{{1}}}'
        format_style_quotes = u'{0:>13} = "{1}"'
        format_style = u'{0:>13} = {1}'

        a_doc = self.from_solr['response'].get('docs')[index]
        text = self.__get_doc_type(a_doc.get(
            'doctype', '')) + '{' + self.__get_key(index) + ',\n'

        fields = self.__get_fields(a_doc)
        for field in fields:
            if (field == 'author') or (field == 'editor'):
                text += self.__add_in_wrapped(
                    fields[field],
                    self.__get_author_list(a_doc, field, maxauthor,
                                           authorcutoff), format_style_bracket)
            elif (field == 'title'):
                text += self.__add_in(
                    fields[field], encode_laTex(''.join(a_doc.get(field, ''))),
                    format_style_bracket_quotes)
            elif (field == 'aff'):
                text += self.__add_in_wrapped(
                    fields[field],
                    self.__get_affiliation_list(a_doc, maxauthor,
                                                authorcutoff),
                    format_style_bracket)
            elif (field == 'pub_raw'):
                text += self.__add_in(fields[field],
                                      self.__add_clean_pub_raw(a_doc),
                                      format_style_bracket)
            elif (field == 'pub'):
                text += self.__add_in(fields[field],
                                      self.__get_journal(a_doc, journalformat),
                                      format_style_bracket)
            elif (field == 'doi'):
                text += self.__add_in(fields[field],
                                      ''.join(a_doc.get(field, '')),
                                      format_style_bracket)
            elif (field == 'keyword'):
                text += self.__add_in(fields[field],
                                      self.__add_keywords(a_doc),
                                      format_style_bracket)
            elif (field == 'year'):
                text += self.__add_in(
                    fields[field],
                    a_doc.get(field, '') if a_doc.get(field, '') else None,
                    format_style)
            elif (field == 'volume') or (field == 'issue'):
                text += self.__add_in(
                    fields[field],
                    a_doc.get(field, '') if a_doc.get(field, '') else None,
                    format_style_bracket)
            elif (field == 'month'):
                text += self.__add_in(
                    fields[field],
                    self.__format_date(a_doc.get('pubdate', '')), format_style)
            elif (field == 'abstract') and (include_abs):
                text += self.__add_in_wrapped(
                    fields[field], encode_laTex(a_doc.get(field, '')),
                    format_style_bracket_quotes)
            elif (field == 'eid'):
                text += self.__add_in(fields[field], a_doc.get(field, ''),
                                      format_style_bracket)
            elif (field == 'page_range'):
                text += self.__add_in(fields[field], self.__add_page(a_doc),
                                      format_style_bracket)
            elif (field == 'bibcode'):
                text += self.__add_in(
                    fields[field],
                    current_app.config['EXPORT_SERVICE_FROM_BBB_URL'] + '/' +
                    a_doc.get(field, ''), format_style_bracket)
            elif (field == 'adsnotes'):
                text += self.__add_in(
                    fields[field],
                    current_app.config['EXPORT_SERVICE_ADS_NOTES'],
                    format_style_bracket)
            elif (field == 'eprintid'):
                text += self.__add_in_eprint(fields[field], get_eprint(a_doc),
                                             format_style_bracket)
            elif (field == 'arxiv_class'):
                text += self.__add_in_arxiv_class(fields[field],
                                                  a_doc.get(field, ''),
                                                  format_style_bracket)
            elif (field == 'series') or (field
                                         == 'version') or (field
                                                           == 'publisher'):
                text += self.__add_in(fields[field],
                                      ''.join(a_doc.get(field, '')),
                                      format_style_bracket)

        # remove the last comma,
        text = text[:-len(',\n')] + '\n'

        return text + '}\n\n'