def test_eprint(self): a_doc_no_eprint = solrdata.data['response'].get('docs')[0] assert (get_eprint(a_doc_no_eprint) == '') a_doc_arxiv = \ { "bibcode": "2018arXiv180303598K", "eid": "arXiv:1803.03598" } assert (get_eprint(a_doc_arxiv) == 'arXiv:1803.03598') a_doc_ascl = \ { "bibcode": "2013ascl.soft08009C", "eid": "ascl:1308.009" } assert (get_eprint(a_doc_ascl) == 'ascl:1308.009')
def __get_doc(self, index): """ :param index: index to the docs structure returned from solr :return: """ result = self.custom_format a_doc = self.from_solr['response'].get('docs')[index] for field in self.parsed_spec: if (field[2] == 'title') or (field[2] == 'doi') or (field[2] == 'comment'): result = self.__add_in(result, field, ''.join(a_doc.get(field[2], ''))) elif (field[2] == 'author'): result = self.__add_in(result, field, self.__get_author_list(field[1], index)) elif (field[2] == 'doctype'): result = self.__add_in(result, field, a_doc.get(field[2], '')) elif (field[2] == 'pubdate'): result = self.__add_in( result, field, self.__format_date(a_doc.get(field[2], ''), field[1][-1])) elif (field[2] == 'aff'): result = self.__add_in(result, field, self.__get_affiliation_list(a_doc)) elif (field[2] == 'keyword'): result = self.__add_in(result, field, self.__get_keywords(a_doc)) elif (field[2] == 'url'): result = self.__add_in( result, field, self.__format_url(a_doc.get('bibcode', ''), field[1][-1])) elif (field[2] == 'abstract') or (field[2] == 'copyright') or (field[2] == 'bibcode') or \ (field[2] == 'volume') or (field[2] == 'year'): result = self.__add_in(result, field, a_doc.get(field[2], '')) elif (field[2] == 'pub') or (field[2] == 'pub_raw'): result = self.__add_in(result, field, self.__get_publication(field[1], a_doc)) elif (field[2] == 'citation_count'): result = self.__add_in(result, field, str(a_doc.get(field[2], ''))) elif (field[2] == 'eid,identifier'): result = self.__add_in(result, field, get_eprint(a_doc)) elif (field[2] == 'page,page_range') or (field[2] == 'lastpage,page_range') or ( field[2] == 'page_range,page'): result = self.__add_in(result, field, self.__get_page(field[2], a_doc)) result += self.line_feed return self.__format_line_wrapped(result, index)
def __get_doc_reference_xml(self, index, parent, includeAbs): """ for each document from Solr, get the fields, and format them accordingly for Reference format :param index: :param parent: :param includeAbs: :return: """ a_doc = self.from_solr['response'].get('docs')[index] fields = self.__get_fields(self.EXPORT_FORMAT_REF_XML) record = ET.SubElement(parent, "record") for field in fields: if (field == 'bibcode') or (field == 'pub') or (field == 'volume') or \ (field == 'copyright'): self.__add_in(record, fields[field], a_doc.get(field, '')) elif (field == 'title') or (field == 'page') or (field == 'doi'): self.__add_in(record, fields[field], ''.join(a_doc.get(field, ''))) elif (field == 'author'): self.__add_author_list(a_doc, record, fields[field]) elif (field == 'aff'): self.__add_affiliation_list(a_doc, record, fields[field]) elif (field == 'date'): self.__add_in( record, fields[field], self.__format_date(a_doc.get(field, ''), self.EXPORT_FORMAT_REF_XML)) elif (field == 'pub_raw'): self.__add_pub_raw(a_doc, record, fields[field], self.EXPORT_FORMAT_REF_XML) elif (field == 'keyword'): self.__add_keywords(a_doc, record, self.EXPORT_FORMAT_REF_XML) elif (field == 'url'): self.__add_in( record, fields[field], current_app.config.get('EXPORT_SERVICE_FROM_BBB_URL') + '/' + a_doc.get('bibcode', '')) elif (field == 'citation_count'): self.__add_in( record, fields[field], self.__get_citation(int(a_doc.get(field, 0)), self.EXPORT_FORMAT_REF_XML)) elif (field == 'abstract') and (includeAbs): self.__add_in(record, fields[field], self.__format_line_wrapped(a_doc.get(field, ''))) elif (field == 'link'): self.__add_doc_links(a_doc, record) elif (field == 'eprintid'): self.__add_in(record, fields[field], get_eprint(a_doc))
def __get_doc(self, index, include_abs, maxauthor, authorcutoff, journalformat): """ for each document from Solr, get the fields, and format them accordingly :param index: :param include_abs: :param maxauthor: :param authorcutoff: :param journalformat: :return: """ format_style_bracket_quotes = u'{0:>13} = "{{{1}}}"' format_style_bracket = u'{0:>13} = {{{1}}}' format_style_quotes = u'{0:>13} = "{1}"' format_style = u'{0:>13} = {1}' a_doc = self.from_solr['response'].get('docs')[index] text = self.__get_doc_type(a_doc.get( 'doctype', '')) + '{' + self.__get_key(index) + ',\n' fields = self.__get_fields(a_doc) for field in fields: if (field == 'author') or (field == 'editor'): text += self.__add_in_wrapped( fields[field], self.__get_author_list(a_doc, field, maxauthor, authorcutoff), format_style_bracket) elif (field == 'title'): text += self.__add_in( fields[field], encode_laTex(''.join(a_doc.get(field, ''))), format_style_bracket_quotes) elif (field == 'aff'): text += self.__add_in_wrapped( fields[field], self.__get_affiliation_list(a_doc, maxauthor, authorcutoff), format_style_bracket) elif (field == 'pub_raw'): text += self.__add_in(fields[field], self.__add_clean_pub_raw(a_doc), format_style_bracket) elif (field == 'pub'): text += self.__add_in(fields[field], self.__get_journal(a_doc, journalformat), format_style_bracket) elif (field == 'doi'): text += self.__add_in(fields[field], ''.join(a_doc.get(field, '')), format_style_bracket) elif (field == 'keyword'): text += self.__add_in(fields[field], self.__add_keywords(a_doc), format_style_bracket) elif (field == 'year'): text += self.__add_in( fields[field], a_doc.get(field, '') if a_doc.get(field, '') else None, format_style) elif (field == 'volume') or (field == 'issue'): text += self.__add_in( fields[field], a_doc.get(field, '') if a_doc.get(field, '') else None, format_style_bracket) elif (field == 'month'): text += self.__add_in( fields[field], self.__format_date(a_doc.get('pubdate', '')), format_style) elif (field == 'abstract') and (include_abs): text += self.__add_in_wrapped( fields[field], encode_laTex(a_doc.get(field, '')), format_style_bracket_quotes) elif (field == 'eid'): text += self.__add_in(fields[field], a_doc.get(field, ''), format_style_bracket) elif (field == 'page_range'): text += self.__add_in(fields[field], self.__add_page(a_doc), format_style_bracket) elif (field == 'bibcode'): text += self.__add_in( fields[field], current_app.config['EXPORT_SERVICE_FROM_BBB_URL'] + '/' + a_doc.get(field, ''), format_style_bracket) elif (field == 'adsnotes'): text += self.__add_in( fields[field], current_app.config['EXPORT_SERVICE_ADS_NOTES'], format_style_bracket) elif (field == 'eprintid'): text += self.__add_in_eprint(fields[field], get_eprint(a_doc), format_style_bracket) elif (field == 'arxiv_class'): text += self.__add_in_arxiv_class(fields[field], a_doc.get(field, ''), format_style_bracket) elif (field == 'series') or (field == 'version') or (field == 'publisher'): text += self.__add_in(fields[field], ''.join(a_doc.get(field, '')), format_style_bracket) # remove the last comma, text = text[:-len(',\n')] + '\n' return text + '}\n\n'
def __get_doc(self, index, fields, export_format): """ :param index: :param fields: :param export_format: :return: """ result = '' a_doc = self.from_solr['response'].get('docs')[index] for field in fields: if (field == 'title') or (field == 'page') or (field == 'doi') or (field == 'isbn') or \ (field == 'pubnote') or (field == 'issn') or (field == 'pub'): result += self.__add_in(fields[field], ''.join(a_doc.get(field, ''))) elif (field == 'lastpage'): result += self.__add_in( fields[field], self.___get_last_page(a_doc.get('page_range', ''))) elif (field == 'author'): result += self.__add_author_list(a_doc, export_format, fields[field]) elif (field == 'doctype'): result += self.__add_in( fields[field], self.__get_doc_type(a_doc.get(field, ''), export_format)) elif (field == 'pubdate'): result += self.__add_in( fields[field], self.__format_date(a_doc.get(field, ''), export_format)) elif (field == 'abstract'): # 9/18/2020 as per request of a user, no line wrapping abstract result += self.__add_in(fields[field], self.__add_abstract(a_doc)) elif (field == 'aff'): result += self.__get_affiliation_list(a_doc, export_format, fields[field]) elif (field == 'keyword'): result += self.__add_keywords(a_doc, export_format, fields[field]) elif (field == 'comment'): result += self.__add_in( fields[field], self.__format_line_wrapped(self.__add_comment(a_doc))) elif (field == 'url'): result += self.__add_in( fields[field], current_app.config['EXPORT_SERVICE_FROM_BBB_URL'] + '/' + a_doc.get('bibcode', '')) elif (field == 'endRecord'): result += (fields[field] + '\n') elif (field == 'pub_raw'): result += self.__add_in(fields[field], self.__add_clean_pub_raw(a_doc)) elif (field == 'links'): result += self.__add_doc_links(a_doc, fields[field]) elif (field == 'eprintid'): result += self.__add_in(fields[field], get_eprint(a_doc)) elif (field == 'bibstem'): result += self.__add_in(fields[field], a_doc.get(field, ['', ''])[0]) else: result += self.__add_in(fields[field], a_doc.get(field, '')) # line feed once the doc is complete return result + '\n\n'