def format_element(bfo, name, tag_name='', tag='', kb='', kb_default_output='', var='', protocol='googlescholar'): """Prints a custom field in a way suitable to be used in HTML META tags. In particular conforms to Google Scholar harvesting protocol as defined http://scholar.google.com/intl/en/scholar/inclusion.html and Open Graph http://ogp.me/ @param tag_name: the name, from tag table, of the field to be exported looks initially for names prefixed by "meta-"<tag_name> then looks for exact name, then falls through to "tag" @param tag: the MARC tag to be exported (only if not defined by tag_name) @param name: name to be displayed in the meta headers, labelling this value. @param kb: a knowledge base through which to process the retrieved value if necessary. @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we return? Either return the given parameter or specify "{value}" to return the retrieved value before processing though kb. @param var: the name of a variable to output instead of field from metadata. Allowed values are those supported by bfe_server_info and bfe_client_info. Overrides <code>name</code> and <code>tag_name</code> @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph' @see: bfe_server_info.py, bfe_client_info.py """ if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR: return "" elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH: return "" tags = [] if var: # delegate to bfe_server_info or bfe_client_info: value = server_info(bfo, var) if value.startswith("Unknown variable: "): # Oops variable was not defined there value = client_info(bfo, var) return not value.startswith("Unknown variable: ") and \ create_metatag(name=name, content=cgi.escape(value, True)) \ or "" elif tag_name: # First check for special meta named tags tags = get_field_tags("meta-" + tag_name) if not tags: # then check for regular tags tags = get_field_tags(tag_name) if not tags and tag: # fall back to explicit marc tag tags = [tag] if not tags: return '' out = [] values = [bfo.fields(marctag, escape=9) for marctag in tags] for value in values: if isinstance(value, list): for val in value: if isinstance(val, dict): out.extend(val.values()) else: out.append(val) elif isinstance(value, dict): out.extend(value.values()) else: out.append(value) out = dict(zip(out, len(out) * [''])).keys() # Remove duplicates if name == 'citation_date': for idx in range(len(out)): out[idx] = out[idx].replace('-', '/') if kb: if kb_default_output == "{value}": out = [bfo.kb(kb, value, value) for value in out] else: out = [bfo.kb(kb, value, kb_default_output) for value in out] return '\n'.join( [create_metatag(name=name, content=value) for value in out])
def format_element(bfo, name, tag_name='', tag='', kb='', kb_default_output='', var='', protocol='googlescholar'): """Prints a custom field in a way suitable to be used in HTML META tags. In particular conforms to Google Scholar harvesting protocol as defined http://scholar.google.com/intl/en/scholar/inclusion.html and Open Graph http://ogp.me/ @param tag_name: the name, from tag table, of the field to be exported looks initially for names prefixed by "meta-"<tag_name> then looks for exact name, then falls through to "tag" @param tag: the MARC tag to be exported (only if not defined by tag_name). Comma-separated list of tags. @param name: name to be displayed in the meta headers, labelling this value. @param kb: a knowledge base through which to process the retrieved value if necessary. @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we return? Either return the given parameter or specify "{value}" to return the retrieved value before processing though kb. @param var: the name of a variable to output instead of field from metadata. Allowed values are those supported by bfe_server_info and bfe_client_info. Overrides <code>name</code> and <code>tag_name</code> @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph' @see: bfe_server_info.py, bfe_client_info.py """ if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR: return "" elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH: return "" matched_by_tag_name_p = False tags = [] if var: # delegate to bfe_server_info or bfe_client_info: value = server_info(bfo, var) if value.startswith("Unknown variable: "): # Oops variable was not defined there value = client_info(bfo, var) return not value.startswith("Unknown variable: ") and \ create_metatag(name=name, content=cgi.escape(value, True)) \ or "" elif tag_name: # First check for special meta named tags tags = get_field_tags("meta-" + tag_name) if not tags: # then check for regular tags tags = get_field_tags(tag_name) matched_by_tag_name_p = tags and True or False if not tags and tag: # fall back to explicit marc tag if ',' in tag: tags = tag.split(',') else: tags = [tag] if not tags: return '' out = [] if protocol == 'googlescholar' and \ (tags == ['100__a'] or tags == ['700__a']): # Authors for Google Scholar: remove names that are not purely # author (thesis director, coordinator, etc). Assume that # existence of $e subfield is a sign. Since this assumption # might be wrong, put some strong conditions in order to get # into this branch, with easy way to bypass. values = [ field_instance[tags[0][-1]] for field_instance in bfo.fields(tags[0][:-1], escape=9) if 'e' not in field_instance and tags[0][-1] in field_instance ] else: # Standard fetching of values values = [bfo.fields(marctag, escape=9) for marctag in tags] if name == 'citation_dissertation_institution': if CFG_CERN_SITE and \ 'THESIS' in bfo.fields('980__a'): authors = bfo.fields('100__', escape=9) authors.extend(bfo.fields('700__', escape=9)) values = [field_instance['u'] for field_instance in authors \ if not field_instance.has_key('e') and field_instance.has_key('u')] elif tag == '100__u' and not matched_by_tag_name_p: # TODO: find way to map correctly this tag values = [] for value in values: if isinstance(value, list): for val in value: if isinstance(val, dict): out.extend(val.values()) else: out.append(val) elif isinstance(value, dict): out.extend(value.values()) else: out.append(value) if name == 'citation_title': out = [' : '.join(out)] if name == 'citation_date': for idx in range(len(out)): out[idx] = out[idx].replace('-', '/') elif name == 'citation_publication_date': for idx in range(len(out)): # Stop at first match parsed_date = parse_date_for_googlescholar(out[idx]) if parsed_date: out = [parsed_date] break out = dict(zip(out, len(out) * [''])).keys() # Remove duplicates if kb: if kb_default_output == "{value}": out = [bfo.kb(kb, value, value) for value in out] else: out = [bfo.kb(kb, value, kb_default_output) for value in out] return '\n'.join( [create_metatag(name=name, content=value) for value in out])
def format_element(bfo, name, tag_name='', tag='', kb='', kb_default_output='', var='', protocol='googlescholar'): """Prints a custom field in a way suitable to be used in HTML META tags. In particular conforms to Google Scholar harvesting protocol as defined http://scholar.google.com/intl/en/scholar/inclusion.html and Open Graph http://ogp.me/ @param tag_name: the name, from tag table, of the field to be exported looks initially for names prefixed by "meta-"<tag_name> then looks for exact name, then falls through to "tag" @param tag: the MARC tag to be exported (only if not defined by tag_name). Comma-separated list of tags. @param name: name to be displayed in the meta headers, labelling this value. @param kb: a knowledge base through which to process the retrieved value if necessary. @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we return? Either return the given parameter or specify "{value}" to return the retrieved value before processing though kb. @param var: the name of a variable to output instead of field from metadata. Allowed values are those supported by bfe_server_info and bfe_client_info. Overrides <code>name</code> and <code>tag_name</code> @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph' @see: bfe_server_info.py, bfe_client_info.py """ if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR: return "" elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH: return "" matched_by_tag_name_p = False tags = [] if var: # delegate to bfe_server_info or bfe_client_info: value = server_info(bfo, var) if value.startswith("Unknown variable: "): # Oops variable was not defined there value = client_info(bfo, var) return not value.startswith("Unknown variable: ") and \ create_metatag(name=name, content=cgi.escape(value, True)) \ or "" elif tag_name: # First check for special meta named tags tags = get_field_tags("meta-" + tag_name) if not tags: # then check for regular tags tags = get_field_tags(tag_name) matched_by_tag_name_p = tags and True or False if not tags and tag: # fall back to explicit marc tag if ',' in tag: tags = tag.split(',') else: tags = [tag] if not tags: return '' out = [] if protocol == 'googlescholar' and \ (tags == ['100__a'] or tags == ['700__a']): # Authors for Google Scholar: remove names that are not purely # author (thesis director, coordinator, etc). Assume that # existence of $e subfield is a sign. Since this assumption # might be wrong, put some strong conditions in order to get # into this branch, with easy way to bypass. values = [field_instance[tags[0][-1]] for field_instance in bfo.fields(tags[0][:-1], escape=9) \ if not field_instance.has_key('e')] else: # Standard fetching of values values = [bfo.fields(marctag, escape=9) for marctag in tags] if name == 'citation_dissertation_institution': if CFG_CERN_SITE and \ 'THESIS' in bfo.fields('980__a'): authors = bfo.fields('100__', escape=9) authors.extend(bfo.fields('700__', escape=9)) values = [field_instance['u'] for field_instance in authors \ if not field_instance.has_key('e') and field_instance.has_key('u')] elif tag == '100__u' and not matched_by_tag_name_p: # TODO: find way to map correctly this tag values = [] for value in values: if isinstance(value, list): for val in value: if isinstance(val, dict): out.extend(val.values()) else: out.append(val) elif isinstance(value, dict): out.extend(value.values()) else: out.append(value) if name == 'citation_date': for idx in range(len(out)): out[idx] = out[idx].replace('-', '/') elif name == 'citation_publication_date': for idx in range(len(out)): # Stop at first match parsed_date = parse_date_for_googlescholar(out[idx]) if parsed_date: out = [parsed_date] break out = dict(zip(out, len(out)*[''])).keys() # Remove duplicates if kb: if kb_default_output == "{value}": out = [bfo.kb(kb, value, value) for value in out] else: out = [bfo.kb(kb, value, kb_default_output) for value in out] return '\n'.join([create_metatag(name=name, content=value) for value in out])
def format_element(bfo, name, tag_name='', tag='', kb='', kb_default_output='', var='', protocol='googlescholar'): """Prints a custom field in a way suitable to be used in HTML META tags. In particular conforms to Google Scholar harvesting protocol as defined http://scholar.google.com/intl/en/scholar/inclusion.html and Open Graph http://ogp.me/ @param tag_name: the name, from tag table, of the field to be exported looks initially for names prefixed by "meta-"<tag_name> then looks for exact name, then falls through to "tag" @param tag: the MARC tag to be exported (only if not defined by tag_name) @param name: name to be displayed in the meta headers, labelling this value. @param kb: a knowledge base through which to process the retrieved value if necessary. @param kb: when a '<code>kb</code>' is specified and no match for value is found, what shall we return? Either return the given parameter or specify "{value}" to return the retrieved value before processing though kb. @param var: the name of a variable to output instead of field from metadata. Allowed values are those supported by bfe_server_info and bfe_client_info. Overrides <code>name</code> and <code>tag_name</code> @param protocol: the protocol this tag is aimed at. Can be used to switch on/off support for a given "protocol". Can take values among 'googlescholar', 'opengraph' @see: bfe_server_info.py, bfe_client_info.py """ if protocol == 'googlescholar' and not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR: return "" elif protocol == 'opengraph' and not CFG_WEBSEARCH_ENABLE_OPENGRAPH: return "" tags = [] if var: # delegate to bfe_server_info or bfe_client_info: value = server_info(bfo, var) if value.startswith("Unknown variable: "): # Oops variable was not defined there value = client_info(bfo, var) return not value.startswith("Unknown variable: ") and \ create_metatag(name=name, content=cgi.escape(value, True)) \ or "" elif tag_name: # First check for special meta named tags tags = get_field_tags("meta-" + tag_name) if not tags: # then check for regular tags tags = get_field_tags(tag_name) if not tags and tag: # fall back to explicit marc tag tags = [tag] if not tags: return '' out = [] values = [bfo.fields(marctag, escape=9) for marctag in tags] for value in values: if isinstance(value, list): for val in value: if isinstance(val, dict): out += val.values() else: out.append(val) out += [isinstance(val, dict) and val.values() or val for val in value] elif isinstance(value, dict): out += value.values() else: out.append(value) out = dict(zip(out, len(out)*[''])).keys() # Remove duplicates if name == 'citation_date': for idx in range(len(out)): out[idx] = out[idx].replace('-', '/') if kb: if kb_default_output == "{value}": out = [bfo.kb(kb, value, value) for value in out] else: out = [bfo.kb(kb, value, kb_default_output) for value in out] return '\n'.join([create_metatag(name=name, content=value) for value in out])