def get_authors_tags(): """ Get the tags for main author, coauthors, alternative authors from config """ config = load_config_file('citation') function = config.get("rank_method", "function") tags_names = [ 'first_author', 'additional_author', 'alternative_author_name', 'collaboration_name', ] tags = {} for t in tags_names: r_tag = config.get(function, t) tags[t] = tagify(parse_tag(r_tag)) return tags
def control_field(self, tag, escape=0): """ Returns the value of control field given by tag in record :param tag: the marc code of a field :param escape: 1 if returned value should be escaped. Else 0. @return: value of field tag in record """ if self.get_record() is None: #Case where BibRecord could not parse object return '' p_tag = parse_tag(tag) field_value = record_get_field_value(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) if escape == 0: return field_value else: return escape_field(field_value, escape)
def fields(self, tag, escape=0, repeatable_subfields_p=False): """ Returns the list of values corresonding to "tag". If tag has an undefined subcode (such as 999C5), the function returns a list of dictionaries, whoose keys are the subcodes and the values are the values of tag.subcode. If the tag has a subcode, simply returns list of values corresponding to tag. Eg. for given MARC:: 999C5 $a value_1a $b value_1b 999C5 $b value_2b 999C5 $b value_3b $b value_3b_bis >>> bfo.fields('999C5b') >>> ['value_1b', 'value_2b', 'value_3b', 'value_3b_bis'] >>> bfo.fields('999C5') >>> [{'a':'value_1a', 'b':'value_1b'}, {'b':'value_2b'}, {'b':'value_3b'}] By default the function returns only one value for each subfield (that is it considers that repeatable subfields are not allowed). It is why in the above example 'value3b_bis' is not shown for bfo.fields('999C5'). (Note that it is not defined which of value_3b or value_3b_bis is returned). This is to simplify the use of the function, as most of the time subfields are not repeatable (in that way we get a string instead of a list). You can allow repeatable subfields by setting 'repeatable_subfields_p' parameter to True. In this mode, the above example would return: >>> bfo.fields('999C5b', repeatable_subfields_p=True) >>> ['value_1b', 'value_2b', 'value_3b'] >>> bfo.fields('999C5', repeatable_subfields_p=True) >>> [{'a':['value_1a'], 'b':['value_1b']}, {'b':['value_2b']}, {'b':['value_3b', 'value3b_bis']}] NOTICE THAT THE RETURNED STRUCTURE IS DIFFERENT. Also note that whatever the value of 'repeatable_subfields_p' is, bfo.fields('999C5b') always show all fields, even repeatable ones. This is because the parameter has no impact on the returned structure (it is always a list). 'escape' parameter allows to escape special characters of the fields. The value of escape can be: 0. No escaping 1. Escape all HTML characters 2. Remove unsafe HTML tags (Eg. keep <br />) 3. Mix of mode 1 and 2. If value of field starts with <!-- HTML -->, then use mode 2. Else use mode 1. 4. Remove all HTML tags 5. Same as 2, with more tags allowed (like <img>) 6. Same as 3, with more tags allowed (like <img>) 7. Mix of mode 0 and mode 1. If field_value starts with <!--HTML-->, then use mode 0. Else use mode 1. 8. Same as mode 1, but also escape double-quotes 9. Same as mode 4, but also escape double-quotes :param tag: the marc code of a field :param escape: 1 if returned values should be escaped. Else 0. @repeatable_subfields_p if True, returns the list of subfields in the dictionary @return: values of field tag in record """ if self.get_record() is None: # Case where BibRecord could not parse object return [] p_tag = parse_tag(tag) if p_tag[3] != "": # Subcode has been defined. Simply returns list of values values = record_get_field_values(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) if escape == 0: return values else: return [escape_field(value, escape) for value in values] else: # Subcode is undefined. Returns list of dicts. # However it might be the case of a control field. instances = record_get_field_instances(self.get_record(), p_tag[0], p_tag[1], p_tag[2]) if repeatable_subfields_p: list_of_instances = [] for instance in instances: instance_dict = {} for subfield in instance[0]: if subfield[0] not in instance_dict: instance_dict[subfield[0]] = [] if escape == 0: instance_dict[subfield[0]].append(subfield[1]) else: instance_dict[subfield[0]].append(escape_field(subfield[1], escape)) list_of_instances.append(instance_dict) return list_of_instances else: if escape == 0: return [dict(instance[0]) for instance in instances] else: return [dict([(subfield[0], escape_field(subfield[1], escape)) for subfield in instance[0]]) for instance in instances]
def get_tags_config(config): """Fetch needs config from our config file""" # Probably "citation" unless this file gets renamed function = config.get("rank_method", "function") write_message("config function %s" % function, verbose=9) tags = {} # 037a: contains (often) the "hep-ph/0501084" tag of THIS record try: tag = config.get(function, "primary_report_number") except ConfigParser.NoOptionError: tags['record_pri_number'] = None else: tags['record_pri_number'] = tagify(parse_tag(tag)) # 088a: additional short identifier for the record try: tag = config.get(function, "additional_report_number") except ConfigParser.NoOptionError: tags['record_add_number'] = None else: tags['record_add_number'] = tagify(parse_tag(tag)) # 999C5r. this is in the reference list, refers to other records. # Looks like: hep-ph/0408002 try: tag = config.get(function, "reference_via_report_number") except ConfigParser.NoOptionError: tags['refs_report_number'] = None else: tags['refs_report_number'] = tagify(parse_tag(tag)) # 999C5s. this is in the reference list, refers to other records. # Looks like: Phys.Rev.,A21,78 try: tag = config.get(function, "reference_via_pubinfo") except ConfigParser.NoOptionError: tags['refs_journal'] = None else: tags['refs_journal'] = tagify(parse_tag(tag)) # 999C5a. this is in the reference list, refers to other records. # Looks like: 10.1007/BF03170733 try: tag = config.get(function, "reference_via_doi") except ConfigParser.NoOptionError: tags['refs_doi'] = None else: tags['refs_doi'] = tagify(parse_tag(tag)) # 999C50. this is in the reference list, refers to other records. # Looks like: 1205 try: tag = config.get(function, "reference_via_record_id") except ConfigParser.NoOptionError: tags['refs_record_id'] = None else: tags['refs_record_id'] = tagify(parse_tag(tag)) # 999C5i. this is in the reference list, refers to other records. # Looks like: 9781439520031 try: tag = config.get(function, "reference_via_isbn") except ConfigParser.NoOptionError: tags['refs_isbn'] = None else: tags['refs_isbn'] = tagify(parse_tag(tag)) # Fields needed to construct the journals for this record try: tag = { 'pages': config.get(function, "pubinfo_journal_page"), 'year': config.get(function, "pubinfo_journal_year"), 'journal': config.get(function, "pubinfo_journal_title"), 'volume': config.get(function, "pubinfo_journal_volume"), } except ConfigParser.NoOptionError: tags['publication'] = None else: tags['publication'] = { 'pages': tagify(parse_tag(tag['pages'])), 'year': tagify(parse_tag(tag['year'])), 'journal': tagify(parse_tag(tag['journal'])), 'volume': tagify(parse_tag(tag['volume'])), } # Fields needed to lookup the DOIs tags['doi'] = get_field_tags('doi') # Fields needed to lookup the ISBN tags['isbn'] = get_field_tags('isbn') # 999C5s. A standardized way of writing a reference in the reference list. # Like: Nucl. Phys. B 710 (2000) 371 try: tags['publication_format'] = config.get(function, "pubinfo_journal_format") except ConfigParser.NoOptionError: tags['publication_format'] = CFG_JOURNAL_PUBINFO_STANDARD_FORM # Print values of tags for debugging write_message("tag values: %r" % [tags], verbose=9) return tags
def get_tags_config(config): """Fetch needs config from our config file""" # Probably "citation" unless this file gets renamed function = config.get("rank_method", "function") write_message("config function %s" % function, verbose=9) tags = {} # 037a: contains (often) the "hep-ph/0501084" tag of THIS record try: tag = config.get(function, "primary_report_number") except ConfigParser.NoOptionError: tags["record_pri_number"] = None else: tags["record_pri_number"] = tagify(parse_tag(tag)) # 088a: additional short identifier for the record try: tag = config.get(function, "additional_report_number") except ConfigParser.NoOptionError: tags["record_add_number"] = None else: tags["record_add_number"] = tagify(parse_tag(tag)) # 999C5r. this is in the reference list, refers to other records. # Looks like: hep-ph/0408002 try: tag = config.get(function, "reference_via_report_number") except ConfigParser.NoOptionError: tags["refs_report_number"] = None else: tags["refs_report_number"] = tagify(parse_tag(tag)) # 999C5s. this is in the reference list, refers to other records. # Looks like: Phys.Rev.,A21,78 try: tag = config.get(function, "reference_via_pubinfo") except ConfigParser.NoOptionError: tags["refs_journal"] = None else: tags["refs_journal"] = tagify(parse_tag(tag)) # 999C5a. this is in the reference list, refers to other records. # Looks like: 10.1007/BF03170733 try: tag = config.get(function, "reference_via_doi") except ConfigParser.NoOptionError: tags["refs_doi"] = None else: tags["refs_doi"] = tagify(parse_tag(tag)) # 999C50. this is in the reference list, refers to other records. # Looks like: 1205 try: tag = config.get(function, "reference_via_record_id") except ConfigParser.NoOptionError: tags["refs_record_id"] = None else: tags["refs_record_id"] = tagify(parse_tag(tag)) # 999C5i. this is in the reference list, refers to other records. # Looks like: 9781439520031 try: tag = config.get(function, "reference_via_isbn") except ConfigParser.NoOptionError: tags["refs_isbn"] = None else: tags["refs_isbn"] = tagify(parse_tag(tag)) # Fields needed to construct the journals for this record try: tag = { "pages": config.get(function, "pubinfo_journal_page"), "year": config.get(function, "pubinfo_journal_year"), "journal": config.get(function, "pubinfo_journal_title"), "volume": config.get(function, "pubinfo_journal_volume"), } except ConfigParser.NoOptionError: tags["publication"] = None else: tags["publication"] = { "pages": tagify(parse_tag(tag["pages"])), "year": tagify(parse_tag(tag["year"])), "journal": tagify(parse_tag(tag["journal"])), "volume": tagify(parse_tag(tag["volume"])), } # Fields needed to lookup the DOIs tags["doi"] = get_field_tags("doi") # Fields needed to lookup the ISBN tags["isbn"] = get_field_tags("isbn") # 999C5s. A standardized way of writing a reference in the reference list. # Like: Nucl. Phys. B 710 (2000) 371 try: tags["publication_format"] = config.get(function, "pubinfo_journal_format") except ConfigParser.NoOptionError: tags["publication_format"] = CFG_JOURNAL_PUBINFO_STANDARD_FORM # Print values of tags for debugging write_message("tag values: %r" % [tags], verbose=9) return tags
def format_element(bfo, tag, limit, instances_separator=" ", subfields_separator=" ", extension="", output_pattern=""): """ Prints the given field of a record. If tag is in range [001, 010], this element assumes that it accesses a control field. Else it considers it accesses a data field. <p>For eg. consider the following metdata: <pre> 100__ $$aCalatroni, S$$uCERN 245__ $$aStatus of the EP Simulations and Facilities for the SPL 700__ $$aFerreira, L$$uCERN 700__ $$aMacatrao, M$$uCERN 700__ $$aSkala, A$$uCERN 700__ $$aSosin, M$$uCERN 700__ $$ade Waele, R$$uCERN 700__ $$aWithofs, Y$$uKHLim, Diepenbeek </pre> The following calls to bfe_field would print: <pre> <BFE_FIELD tag="700" instances_separator="<br/>" subfields_separator=" - "> Ferreira, L - CERN Macatrao, M - CERN Skala, A - CERN Sosin, M - CERN de Waele, R - CERN Withofs, Y - KHLim, Diepenbeek </pre> </p> <p>For more advanced formatting, the <code>output_pattern</code> parameter can be used to output the subfields of each instance in the specified way. For eg. consider the following metadata: <pre> 775__ $$b15. Aufl.$$c1995-1996$$nv.1$$pGrundlagen und Werkstoffe$$w317999 775__ $$b12. Aufl.$$c1963$$w278898 775__ $$b14. Aufl.$$c1983$$w107899 775__ $$b13. Aufl.$$c1974$$w99635 </pre> with the following <code>output_pattern</code>: <pre> <a href="/record/%(w)s">%(b)s (%(c)s) %(n)s %(p)s</a> </pre> would print:<br/> <a href="/record/317999">15. Aufl. (1995-1996) v.1 Grundlagen und Werkstoffe</a><br/> <a href="/record/278898">12. Aufl. (1963) </a><br/> <a href="/record/107899">14. Aufl. (1983) </a><br/> <a href="/record/99635">13. Aufl. (1974) </a> <br/>(<code>instances_separator="<br/>"</code> set for readability)<br/> The output pattern must follow <a href="http://docs.python.org/library/stdtypes.html#string-formatting-operations">Python string formatting</a> syntax. The format must use parenthesized notation to map to the subfield code. This currently restricts the support of <code>output_pattern</code> to non-repeatable subfields</p> @param tag: the tag code of the field that is to be printed @param instances_separator: a separator between instances of field @param subfields_separator: a separator between subfields of an instance @param limit: the maximum number of values to display. @param extension: a text printed at the end if 'limit' has been exceeded @param output_pattern: when specified, prints the subfields of each instance according to pattern specified as parameter (following Python string formatting convention) """ # Check if data or control field p_tag = parse_tag(tag) if p_tag[0].isdigit() and int(p_tag[0]) in range(0, 11): return bfo.control_field(tag) elif p_tag[0].isdigit(): # Get values without subcode. # We will filter unneeded subcode later if p_tag[1] == '': p_tag[1] = '_' if p_tag[2] == '': p_tag[2] = '_' values = bfo.fields(p_tag[0]+p_tag[1]+p_tag[2]) # Values will # always be a # list of # dicts else: return '' x = 0 instances_out = [] # Retain each instance output for instance in values: filtered_values = [value for (subcode, value) in iteritems(instance) if p_tag[3] == '' or p_tag[3] == '%' \ or p_tag[3] == subcode] if len(filtered_values) > 0: # We have found some corresponding subcode(s) if limit.isdigit() and x + len(filtered_values) >= int(limit): # We are going to exceed the limit filtered_values = filtered_values[:int(limit)-x] # Takes only needed one if len(filtered_values) > 0: # do not append empty list! if output_pattern: try: instances_out.append(output_pattern % DictNoKeyError(instance)) except: pass else: instances_out.append(subfields_separator.join(filtered_values)) x += len(filtered_values) # record that so we know limit has been exceeded break # No need to go further else: if output_pattern: try: instances_out.append(output_pattern % DictNoKeyError(instance)) except: pass else: instances_out.append(subfields_separator.join(filtered_values)) x += len(filtered_values) ext_out = '' if limit.isdigit() and x > int(limit): ext_out = extension return instances_separator.join(instances_out) + ext_out