Python parse_tag示例，invenio.modules.formatter.utils.parse_tag Python示例

示例#1

0

显示文件

文件： selfcites_indexer.py 项目： k3njiy/invenio

def get_authors_tags():
    """
    Get the tags for main author, coauthors, alternative authors from config
    """
    config = load_config_file('citation')
    function = config.get("rank_method", "function")

    tags_names = [
        'first_author',
        'additional_author',
        'alternative_author_name',
        'collaboration_name',
    ]

    tags = {}
    for t in tags_names:
        r_tag = config.get(function, t)
        tags[t] = tagify(parse_tag(r_tag))

    return tags

示例#2

0

显示文件

文件： selfcites_indexer.py 项目： chokribr/invenio-1

def get_authors_tags():
    """
    Get the tags for main author, coauthors, alternative authors from config
    """
    config = load_config_file('citation')
    function = config.get("rank_method", "function")

    tags_names = [
        'first_author',
        'additional_author',
        'alternative_author_name',
        'collaboration_name',
    ]

    tags = {}
    for t in tags_names:
        r_tag = config.get(function, t)
        tags[t] = tagify(parse_tag(r_tag))

    return tags

示例#3

0

显示文件

文件： engine.py 项目： derekstrom/invenio

    def control_field(self, tag, escape=0):
        """
        Returns the value of control field given by tag in record

        :param tag: the marc code of a field
        :param escape: 1 if returned value should be escaped. Else 0.
        @return: value of field tag in record
        """
        if self.get_record() is None:
            #Case where BibRecord could not parse object
            return ''

        p_tag = parse_tag(tag)
        field_value = record_get_field_value(self.get_record(),
                                             p_tag[0],
                                             p_tag[1],
                                             p_tag[2],
                                             p_tag[3])
        if escape == 0:
            return field_value
        else:
            return escape_field(field_value, escape)

示例#4

0

显示文件

文件： engine.py 项目： derekstrom/invenio

    def fields(self, tag, escape=0, repeatable_subfields_p=False):
        """
        Returns the list of values corresonding to "tag".

        If tag has an undefined subcode (such as 999C5),
        the function returns a list of dictionaries, whoose keys
        are the subcodes and the values are the values of tag.subcode.
        If the tag has a subcode, simply returns list of values
        corresponding to tag.
        Eg. for given MARC::
            999C5 $a value_1a $b value_1b
            999C5 $b value_2b
            999C5 $b value_3b $b value_3b_bis

            >>> bfo.fields('999C5b')
            >>> ['value_1b', 'value_2b', 'value_3b', 'value_3b_bis']
            >>> bfo.fields('999C5')
            >>> [{'a':'value_1a', 'b':'value_1b'},
                {'b':'value_2b'},
                {'b':'value_3b'}]

        By default the function returns only one value for each
        subfield (that is it considers that repeatable subfields are
        not allowed). It is why in the above example 'value3b_bis' is
        not shown for bfo.fields('999C5').  (Note that it is not
        defined which of value_3b or value_3b_bis is returned).  This
        is to simplify the use of the function, as most of the time
        subfields are not repeatable (in that way we get a string
        instead of a list).  You can allow repeatable subfields by
        setting 'repeatable_subfields_p' parameter to True. In
        this mode, the above example would return:
            >>> bfo.fields('999C5b', repeatable_subfields_p=True)
            >>> ['value_1b', 'value_2b', 'value_3b']
            >>> bfo.fields('999C5', repeatable_subfields_p=True)
            >>> [{'a':['value_1a'], 'b':['value_1b']},
                {'b':['value_2b']},
                {'b':['value_3b', 'value3b_bis']}]

        NOTICE THAT THE RETURNED STRUCTURE IS DIFFERENT.  Also note
        that whatever the value of 'repeatable_subfields_p' is,
        bfo.fields('999C5b') always show all fields, even repeatable
        ones. This is because the parameter has no impact on the
        returned structure (it is always a list).

        'escape' parameter allows to escape special characters
        of the fields. The value of escape can be:
                      0. No escaping
                      1. Escape all HTML characters
                      2. Remove unsafe HTML tags (Eg. keep <br />)
                      3. Mix of mode 1 and 2. If value of field starts with
                      <!-- HTML -->, then use mode 2. Else use mode 1.
                      4. Remove all HTML tags
                      5. Same as 2, with more tags allowed (like <img>)
                      6. Same as 3, with more tags allowed (like <img>)
                      7. Mix of mode 0 and mode 1. If field_value
                      starts with <!--HTML-->, then use mode 0.
                      Else use mode 1.
                      8. Same as mode 1, but also escape double-quotes
                      9. Same as mode 4, but also escape double-quotes

        :param tag: the marc code of a field
        :param escape: 1 if returned values should be escaped. Else 0.
        @repeatable_subfields_p if True, returns the list of subfields in the dictionary
        @return: values of field tag in record
        """

        if self.get_record() is None:
            # Case where BibRecord could not parse object
            return []

        p_tag = parse_tag(tag)
        if p_tag[3] != "":
            # Subcode has been defined. Simply returns list of values
            values = record_get_field_values(self.get_record(),
                                             p_tag[0],
                                             p_tag[1],
                                             p_tag[2],
                                             p_tag[3])
            if escape == 0:
                return values
            else:
                return [escape_field(value, escape) for value in values]

        else:
            # Subcode is undefined. Returns list of dicts.
            # However it might be the case of a control field.

            instances = record_get_field_instances(self.get_record(),
                                                   p_tag[0],
                                                   p_tag[1],
                                                   p_tag[2])
            if repeatable_subfields_p:
                list_of_instances = []
                for instance in instances:
                    instance_dict = {}
                    for subfield in instance[0]:
                        if subfield[0] not in instance_dict:
                            instance_dict[subfield[0]] = []
                        if escape == 0:
                            instance_dict[subfield[0]].append(subfield[1])
                        else:
                            instance_dict[subfield[0]].append(escape_field(subfield[1], escape))
                    list_of_instances.append(instance_dict)
                return list_of_instances
            else:
                if escape == 0:
                    return [dict(instance[0]) for instance in instances]
                else:
                    return [dict([(subfield[0], escape_field(subfield[1], escape))
                                   for subfield in instance[0]])
                            for instance in instances]

示例#5

0

显示文件

文件： citation_indexer.py 项目： chokribr/invenio-1

def get_tags_config(config):
    """Fetch needs config from our config file"""
    # Probably "citation" unless this file gets renamed
    function = config.get("rank_method", "function")
    write_message("config function %s" % function, verbose=9)

    tags = {}

    # 037a: contains (often) the "hep-ph/0501084" tag of THIS record
    try:
        tag = config.get(function, "primary_report_number")
    except ConfigParser.NoOptionError:
        tags['record_pri_number'] = None
    else:
        tags['record_pri_number'] = tagify(parse_tag(tag))

    # 088a: additional short identifier for the record
    try:
        tag = config.get(function, "additional_report_number")
    except ConfigParser.NoOptionError:
        tags['record_add_number'] = None
    else:
        tags['record_add_number'] = tagify(parse_tag(tag))

    # 999C5r. this is in the reference list, refers to other records.
    # Looks like: hep-ph/0408002
    try:
        tag = config.get(function, "reference_via_report_number")
    except ConfigParser.NoOptionError:
        tags['refs_report_number'] = None
    else:
        tags['refs_report_number'] = tagify(parse_tag(tag))
    # 999C5s. this is in the reference list, refers to other records.
    # Looks like: Phys.Rev.,A21,78
    try:
        tag = config.get(function, "reference_via_pubinfo")
    except ConfigParser.NoOptionError:
        tags['refs_journal'] = None
    else:
        tags['refs_journal'] = tagify(parse_tag(tag))
    # 999C5a. this is in the reference list, refers to other records.
    # Looks like: 10.1007/BF03170733
    try:
        tag = config.get(function, "reference_via_doi")
    except ConfigParser.NoOptionError:
        tags['refs_doi'] = None
    else:
        tags['refs_doi'] = tagify(parse_tag(tag))

    # 999C50. this is in the reference list, refers to other records.
    # Looks like: 1205
    try:
        tag = config.get(function, "reference_via_record_id")
    except ConfigParser.NoOptionError:
        tags['refs_record_id'] = None
    else:
        tags['refs_record_id'] = tagify(parse_tag(tag))

    # 999C5i. this is in the reference list, refers to other records.
    # Looks like: 9781439520031
    try:
        tag = config.get(function, "reference_via_isbn")
    except ConfigParser.NoOptionError:
        tags['refs_isbn'] = None
    else:
        tags['refs_isbn'] = tagify(parse_tag(tag))

    # Fields needed to construct the journals for this record
    try:
        tag = {
            'pages': config.get(function, "pubinfo_journal_page"),
            'year': config.get(function, "pubinfo_journal_year"),
            'journal': config.get(function, "pubinfo_journal_title"),
            'volume': config.get(function, "pubinfo_journal_volume"),
        }
    except ConfigParser.NoOptionError:
        tags['publication'] = None
    else:
        tags['publication'] = {
            'pages': tagify(parse_tag(tag['pages'])),
            'year': tagify(parse_tag(tag['year'])),
            'journal': tagify(parse_tag(tag['journal'])),
            'volume': tagify(parse_tag(tag['volume'])),
        }

    # Fields needed to lookup the DOIs
    tags['doi'] = get_field_tags('doi')

    # Fields needed to lookup the ISBN
    tags['isbn'] = get_field_tags('isbn')

    # 999C5s. A standardized way of writing a reference in the reference list.
    # Like: Nucl. Phys. B 710 (2000) 371
    try:
        tags['publication_format'] = config.get(function,
                                                "pubinfo_journal_format")
    except ConfigParser.NoOptionError:
        tags['publication_format'] = CFG_JOURNAL_PUBINFO_STANDARD_FORM

    # Print values of tags for debugging
    write_message("tag values: %r" % [tags], verbose=9)

    return tags

示例#6

0

显示文件

文件： citation_indexer.py 项目： jirikuncar/invenio

def get_tags_config(config):
    """Fetch needs config from our config file"""
    # Probably "citation" unless this file gets renamed
    function = config.get("rank_method", "function")
    write_message("config function %s" % function, verbose=9)

    tags = {}

    # 037a: contains (often) the "hep-ph/0501084" tag of THIS record
    try:
        tag = config.get(function, "primary_report_number")
    except ConfigParser.NoOptionError:
        tags["record_pri_number"] = None
    else:
        tags["record_pri_number"] = tagify(parse_tag(tag))

    # 088a: additional short identifier for the record
    try:
        tag = config.get(function, "additional_report_number")
    except ConfigParser.NoOptionError:
        tags["record_add_number"] = None
    else:
        tags["record_add_number"] = tagify(parse_tag(tag))

    # 999C5r. this is in the reference list, refers to other records.
    # Looks like: hep-ph/0408002
    try:
        tag = config.get(function, "reference_via_report_number")
    except ConfigParser.NoOptionError:
        tags["refs_report_number"] = None
    else:
        tags["refs_report_number"] = tagify(parse_tag(tag))
    # 999C5s. this is in the reference list, refers to other records.
    # Looks like: Phys.Rev.,A21,78
    try:
        tag = config.get(function, "reference_via_pubinfo")
    except ConfigParser.NoOptionError:
        tags["refs_journal"] = None
    else:
        tags["refs_journal"] = tagify(parse_tag(tag))
    # 999C5a. this is in the reference list, refers to other records.
    # Looks like: 10.1007/BF03170733
    try:
        tag = config.get(function, "reference_via_doi")
    except ConfigParser.NoOptionError:
        tags["refs_doi"] = None
    else:
        tags["refs_doi"] = tagify(parse_tag(tag))

    # 999C50. this is in the reference list, refers to other records.
    # Looks like: 1205
    try:
        tag = config.get(function, "reference_via_record_id")
    except ConfigParser.NoOptionError:
        tags["refs_record_id"] = None
    else:
        tags["refs_record_id"] = tagify(parse_tag(tag))

    # 999C5i. this is in the reference list, refers to other records.
    # Looks like: 9781439520031
    try:
        tag = config.get(function, "reference_via_isbn")
    except ConfigParser.NoOptionError:
        tags["refs_isbn"] = None
    else:
        tags["refs_isbn"] = tagify(parse_tag(tag))

    # Fields needed to construct the journals for this record
    try:
        tag = {
            "pages": config.get(function, "pubinfo_journal_page"),
            "year": config.get(function, "pubinfo_journal_year"),
            "journal": config.get(function, "pubinfo_journal_title"),
            "volume": config.get(function, "pubinfo_journal_volume"),
        }
    except ConfigParser.NoOptionError:
        tags["publication"] = None
    else:
        tags["publication"] = {
            "pages": tagify(parse_tag(tag["pages"])),
            "year": tagify(parse_tag(tag["year"])),
            "journal": tagify(parse_tag(tag["journal"])),
            "volume": tagify(parse_tag(tag["volume"])),
        }

    # Fields needed to lookup the DOIs
    tags["doi"] = get_field_tags("doi")

    # Fields needed to lookup the ISBN
    tags["isbn"] = get_field_tags("isbn")

    # 999C5s. A standardized way of writing a reference in the reference list.
    # Like: Nucl. Phys. B 710 (2000) 371
    try:
        tags["publication_format"] = config.get(function, "pubinfo_journal_format")
    except ConfigParser.NoOptionError:
        tags["publication_format"] = CFG_JOURNAL_PUBINFO_STANDARD_FORM

    # Print values of tags for debugging
    write_message("tag values: %r" % [tags], verbose=9)

    return tags

示例#7

0

显示文件

文件： bfe_field.py 项目： mhellmic/b2share

def format_element(bfo, tag, limit, instances_separator=" ",
           subfields_separator=" ", extension="", output_pattern=""):
    """
    Prints the given field of a record.
    If tag is in range [001, 010], this element assumes
    that it accesses a control field. Else it considers it
    accesses a data field.

    <p>For eg. consider the following metdata:
    <pre>
 100__ $$aCalatroni, S$$uCERN
 245__ $$aStatus of the EP Simulations and Facilities for the SPL
 700__ $$aFerreira, L$$uCERN
 700__ $$aMacatrao, M$$uCERN
 700__ $$aSkala, A$$uCERN
 700__ $$aSosin, M$$uCERN
 700__ $$ade Waele, R$$uCERN
 700__ $$aWithofs, Y$$uKHLim, Diepenbeek
    </pre>
    The following calls to bfe_field would print:
    <pre>
    &lt;BFE_FIELD tag="700" instances_separator="&lt;br/>" subfields_separator=" - ">

    Ferreira, L - CERN
    Macatrao, M - CERN
    Skala, A - CERN
    Sosin, M - CERN
    de Waele, R - CERN
    Withofs, Y - KHLim, Diepenbeek
    </pre>
    </p>

    <p>For more advanced formatting, the <code>output_pattern</code>
    parameter can be used to output the subfields of each instance in
    the specified way. For eg. consider the following metadata:
    <pre>
 775__ $$b15. Aufl.$$c1995-1996$$nv.1$$pGrundlagen und Werkstoffe$$w317999
 775__ $$b12. Aufl.$$c1963$$w278898
 775__ $$b14. Aufl.$$c1983$$w107899
 775__ $$b13. Aufl.$$c1974$$w99635
    </pre>
    with the following <code>output_pattern</code>:

    <pre>
    &lt;a href="/record/%(w)s">%(b)s (%(c)s) %(n)s %(p)s&lt;/a>
    </pre>
    would print:<br/>

    <a href="/record/317999">15. Aufl. (1995-1996) v.1 Grundlagen und Werkstoffe</a><br/>
    <a href="/record/278898">12. Aufl. (1963) </a><br/>
    <a href="/record/107899">14. Aufl. (1983) </a><br/>
    <a href="/record/99635">13. Aufl. (1974) </a>

    <br/>(<code>instances_separator="&lt;br/>"</code> set for
    readability)<br/> The output pattern must follow <a
    href="http://docs.python.org/library/stdtypes.html#string-formatting-operations">Python
    string formatting</a> syntax. The format must use parenthesized
    notation to map to the subfield code. This currently restricts the
    support of <code>output_pattern</code> to non-repeatable
    subfields</p>

    @param tag: the tag code of the field that is to be printed
    @param instances_separator: a separator between instances of field
    @param subfields_separator: a separator between subfields of an instance
    @param limit: the maximum number of values to display.
    @param extension: a text printed at the end if 'limit' has been exceeded
    @param output_pattern: when specified, prints the subfields of each instance according to pattern specified as parameter (following Python string formatting convention)
    """
    # Check if data or control field
    p_tag = parse_tag(tag)
    if p_tag[0].isdigit() and int(p_tag[0]) in range(0, 11):
        return  bfo.control_field(tag)
    elif p_tag[0].isdigit():
        # Get values without subcode.
        # We will filter unneeded subcode later
        if p_tag[1] == '':
            p_tag[1] = '_'
        if p_tag[2] == '':
            p_tag[2] = '_'
        values = bfo.fields(p_tag[0]+p_tag[1]+p_tag[2]) # Values will
                                                        # always be a
                                                        # list of
                                                        # dicts
    else:
        return ''

    x = 0
    instances_out = [] # Retain each instance output
    for instance in values:
        filtered_values = [value for (subcode, value) in iteritems(instance)
                          if p_tag[3] == '' or p_tag[3] == '%' \
                           or p_tag[3] == subcode]
        if len(filtered_values) > 0:
            # We have found some corresponding subcode(s)
            if limit.isdigit() and x + len(filtered_values) >= int(limit):
                # We are going to exceed the limit
                filtered_values = filtered_values[:int(limit)-x] # Takes only needed one
                if len(filtered_values) > 0: # do not append empty list!
                    if output_pattern:
                        try:
                            instances_out.append(output_pattern % DictNoKeyError(instance))
                        except:
                            pass
                    else:
                        instances_out.append(subfields_separator.join(filtered_values))
                    x += len(filtered_values) # record that so we know limit has been exceeded
                break # No need to go further
            else:
                if output_pattern:
                    try:
                        instances_out.append(output_pattern % DictNoKeyError(instance))
                    except:
                        pass
                else:
                    instances_out.append(subfields_separator.join(filtered_values))
                x += len(filtered_values)

    ext_out = ''
    if limit.isdigit() and x > int(limit):
        ext_out = extension

    return instances_separator.join(instances_out) + ext_out

示例#8

0

显示文件

def format_element(bfo, tag, limit, instances_separator=" ",
           subfields_separator=" ", extension="", output_pattern=""):
    """
    Prints the given field of a record.
    If tag is in range [001, 010], this element assumes
    that it accesses a control field. Else it considers it
    accesses a data field.

    <p>For eg. consider the following metdata:
    <pre>
 100__ $$aCalatroni, S$$uCERN
 245__ $$aStatus of the EP Simulations and Facilities for the SPL
 700__ $$aFerreira, L$$uCERN
 700__ $$aMacatrao, M$$uCERN
 700__ $$aSkala, A$$uCERN
 700__ $$aSosin, M$$uCERN
 700__ $$ade Waele, R$$uCERN
 700__ $$aWithofs, Y$$uKHLim, Diepenbeek
    </pre>
    The following calls to bfe_field would print:
    <pre>
    &lt;BFE_FIELD tag="700" instances_separator="&lt;br/>" subfields_separator=" - ">

    Ferreira, L - CERN
    Macatrao, M - CERN
    Skala, A - CERN
    Sosin, M - CERN
    de Waele, R - CERN
    Withofs, Y - KHLim, Diepenbeek
    </pre>
    </p>

    <p>For more advanced formatting, the <code>output_pattern</code>
    parameter can be used to output the subfields of each instance in
    the specified way. For eg. consider the following metadata:
    <pre>
 775__ $$b15. Aufl.$$c1995-1996$$nv.1$$pGrundlagen und Werkstoffe$$w317999
 775__ $$b12. Aufl.$$c1963$$w278898
 775__ $$b14. Aufl.$$c1983$$w107899
 775__ $$b13. Aufl.$$c1974$$w99635
    </pre>
    with the following <code>output_pattern</code>:

    <pre>
    &lt;a href="/record/%(w)s">%(b)s (%(c)s) %(n)s %(p)s&lt;/a>
    </pre>
    would print:<br/>

    <a href="/record/317999">15. Aufl. (1995-1996) v.1 Grundlagen und Werkstoffe</a><br/>
    <a href="/record/278898">12. Aufl. (1963) </a><br/>
    <a href="/record/107899">14. Aufl. (1983) </a><br/>
    <a href="/record/99635">13. Aufl. (1974) </a>

    <br/>(<code>instances_separator="&lt;br/>"</code> set for
    readability)<br/> The output pattern must follow <a
    href="http://docs.python.org/library/stdtypes.html#string-formatting-operations">Python
    string formatting</a> syntax. The format must use parenthesized
    notation to map to the subfield code. This currently restricts the
    support of <code>output_pattern</code> to non-repeatable
    subfields</p>

    @param tag: the tag code of the field that is to be printed
    @param instances_separator: a separator between instances of field
    @param subfields_separator: a separator between subfields of an instance
    @param limit: the maximum number of values to display.
    @param extension: a text printed at the end if 'limit' has been exceeded
    @param output_pattern: when specified, prints the subfields of each instance according to pattern specified as parameter (following Python string formatting convention)
    """
    # Check if data or control field
    p_tag = parse_tag(tag)
    if p_tag[0].isdigit() and int(p_tag[0]) in range(0, 11):
        return  bfo.control_field(tag)
    elif p_tag[0].isdigit():
        # Get values without subcode.
        # We will filter unneeded subcode later
        if p_tag[1] == '':
            p_tag[1] = '_'
        if p_tag[2] == '':
            p_tag[2] = '_'
        values = bfo.fields(p_tag[0]+p_tag[1]+p_tag[2]) # Values will
                                                        # always be a
                                                        # list of
                                                        # dicts
    else:
        return ''

    x = 0
    instances_out = [] # Retain each instance output
    for instance in values:
        filtered_values = [value for (subcode, value) in iteritems(instance)
                          if p_tag[3] == '' or p_tag[3] == '%' \
                           or p_tag[3] == subcode]
        if len(filtered_values) > 0:
            # We have found some corresponding subcode(s)
            if limit.isdigit() and x + len(filtered_values) >= int(limit):
                # We are going to exceed the limit
                filtered_values = filtered_values[:int(limit)-x] # Takes only needed one
                if len(filtered_values) > 0: # do not append empty list!
                    if output_pattern:
                        try:
                            instances_out.append(output_pattern % DictNoKeyError(instance))
                        except:
                            pass
                    else:
                        instances_out.append(subfields_separator.join(filtered_values))
                    x += len(filtered_values) # record that so we know limit has been exceeded
                break # No need to go further
            else:
                if output_pattern:
                    try:
                        instances_out.append(output_pattern % DictNoKeyError(instance))
                    except:
                        pass
                else:
                    instances_out.append(subfields_separator.join(filtered_values))
                x += len(filtered_values)

    ext_out = ''
    if limit.isdigit() and x > int(limit):
        ext_out = extension

    return instances_separator.join(instances_out) + ext_out