Пример #1
0
def calculate_citation_history_coordinates(recid):
    """Return a list of citation graph coordinates for RECID, sorted by year."""
    result = {}
    for year in calculate_citation_graphe_x_coordinates(recid):
        result[year] = 0

    if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS:
        # do not generate graphs that have less than X points
        return []

    for recid in get_cited_by(recid):
        rec_date = get_record_year(recid)
        # Some records simlpy do not have these fields
        if rec_date:
            # Maybe rec_date[0][0:4] has a typo and cannot
            # be converted to an int
            try:
                d = strptime(rec_date[0][:4], '%Y')
            except ValueError:
                pass
            else:
                if d.year in result:
                    result[d.year] += 1

    return sorted(result.iteritems())
Пример #2
0
def parse_date_for_googlescholar(datetime_string):
    """
    Parse (guess) and return the date in a format adequate for Google
    Scholar. We don't use dateutils.guess_datetime() as this one might
    lead to results not accurate enough.
    """
    datetime_string = CFG_PUNCTUATION_PATTERN_RE.sub(' ', datetime_string)
    datetime_string = CFG_SPACES_PATTERN_RE.sub(' ', datetime_string)

    def replace_month(match_obj):
        "Return translated month in the matching object"
        month = match_obj.group(2).strip()
        return match_obj.group(1) + \
               CFG_MONTH_NAMES_MAPPING.get(month.upper(), month) + \
               match_obj.group(3)

    parsed_datetime = None
    for dateformat in CFG_POSSIBLE_DATE_FORMATS:
        try:
            parsed_datetime = strptime(datetime_string.strip(), dateformat)
            break
        except:
            pass

    if not parsed_datetime:
        # Do it all again, with the translated version of the string
        translated_datetime_string = CFG_MONTHS_I18N_PATTERN_RE.sub(
            replace_month, datetime_string)
        for dateformat in CFG_POSSIBLE_DATE_FORMATS:
            try:
                parsed_datetime = strptime(translated_datetime_string.strip(),
                                           dateformat)
                break
            except:
                pass

    if parsed_datetime:
        return strftime('%Y/%m/%d', parsed_datetime)
    else:
        # Look for a year inside the string:
        try:
            return CFG_YEAR_PATTERN_RE.search(datetime_string).group(3)
        except:
            return ''

    return ''
Пример #3
0
def parse_date_for_googlescholar(datetime_string):
    """
    Parse (guess) and return the date in a format adequate for Google
    Scholar. We don't use dateutils.guess_datetime() as this one might
    lead to results not accurate enough.
    """
    datetime_string = CFG_PUNCTUATION_PATTERN_RE.sub(' ', datetime_string)
    datetime_string = CFG_SPACES_PATTERN_RE.sub(' ', datetime_string)

    def replace_month(match_obj):
        "Return translated month in the matching object"
        month = match_obj.group(2).strip()
        return match_obj.group(1) + \
               CFG_MONTH_NAMES_MAPPING.get(month.upper(), month) + \
               match_obj.group(3)

    parsed_datetime = None
    for dateformat in CFG_POSSIBLE_DATE_FORMATS:
        try:
            parsed_datetime = strptime(datetime_string.strip(), dateformat)
            break
        except:
            pass

    if not parsed_datetime:
        # Do it all again, with the translated version of the string
        translated_datetime_string = CFG_MONTHS_I18N_PATTERN_RE.sub(replace_month, datetime_string)
        for dateformat in CFG_POSSIBLE_DATE_FORMATS:
            try:
                parsed_datetime = strptime(translated_datetime_string.strip(), dateformat)
                break
            except:
                pass

    if parsed_datetime:
        return strftime('%Y/%m/%d', parsed_datetime)
    else:
        # Look for a year inside the string:
        try:
            return CFG_YEAR_PATTERN_RE.search(datetime_string).group(3)
        except:
            return ''

    return ''
Пример #4
0
 def _sort_dates(self, val):
     """
     Convert:
     '8 nov 2010' => '2010-11-08'
     'nov 2010' => '2010-11-01'
     '2010' => '2010-01-01'
     """
     datetext_format = "%Y-%m-%d"
     try:
         datestruct = strptime(val, datetext_format)
     except ValueError:
         try:
             datestruct = strptime(val, "%d %b %Y")
         except ValueError:
             try:
                 datestruct = strptime(val, "%b %Y")
             except ValueError:
                 try:
                     datestruct = strptime(val, "%Y")
                 except ValueError:
                     return val
     return strftime(datetext_format, datestruct)
Пример #5
0
def format_element(bfo, place_label, publisher_label, date_label,
           separator=', ', date_format=""):
    """
    Print imprint (Order: Name of publisher, place of publication and date of publication).
    Parameter <code>date_format</code> allows to specify the string representation of the output.
    The format string has the same behaviour as the strftime() function::
        <pre>Eg: 1982-09-24 07:32:00
             "%d %B %Y"   -> 24 September 1982
             "%I:%M"      -> 07:32
        </pre>
    @param separator: a separator between the elements of imprint
    @param place_label: a label to print before the publication place value
    @param publisher_label: a label to print before the publisher name
    @param date_label: a a label to print before the publication date
    @param date_format: date format
    @see: place.py, publisher.py, date.py, reprints.py, pagination.py
    """

    place = bfo.field('260__a')
    publisher = bfo.field('260__b')
    date = bfo.field('260__c')

    out = ""

    if publisher != "sine nomine":
        out += publisher_label + ' ' + publisher + separator

    if place != "sine loco":
        out += place_label + ' ' + place + separator

    if len(date) > 0:
        if date_format != '':
            try:
                date_time = strptime(date, "%Y-%m-%d")
                out += date_label + " " + strftime(date_format, date_time)
            except ValueError:
                out += date_label + ' ' + date
        else:
            out += date_label + ' ' + date

    return out
Пример #6
0
def format_element(bfo, date_format='%d %B %Y', source_formats='%Y-%m-%d', source_fields="260__c",
                   guess_source_format="no", ignore_date_format_for_year_only="yes"):
    """
    Prints the imprint publication date.

    Parameter <code>date_format</code> allows to specify the string
    representation of the output.

    The format string has the same behaviour as the strftime() function:
        <pre>Eg: 1982-09-24 07:32:00
            "%d %B %Y"   -> 24 September 1982
            "%I:%M"      -> 07:32
        </pre>

    Note that if input date is simply a year (4 digits), it is
    returned as such if <code>ignore_date_format_for_year_only</code>
    is set to 'yes', regardless of <code>date_format</code>.

    Parameter <code>source_formats</code> allows to specify the
    expected format of the date in the metadata. If the format does
    not match, the date cannot be parsed, and cannot be formatted
    according to <code>date_format</code>. Comma-separated values can
    be provided in order to test several input formats.

    Parameter <code>source_fields</code> defined the list of MARC
    fields where we would like to retrieve the date. First one
    matching <code>source_formats</code> is used. if none, fall back to
    first non-empty one.

    Parameter <code>guess_source_formats</code> when set to 'yes'
    allows to guess the date source format.


    @see: pagination.py, publisher.py, reprints.py, imprint.py, place.py
    @param date_format: output date format.
    @param source_formats: expected (comma-separated values) input date format.
    @param source_fields: the MARC fields (comma-separated values) to look up
                   for the date. First non-empty one is used.
    @param guess_source_format: if 'yes', ignore 'source_format' and
                                try to guess format using Python mxDateTime module.
    #param ignore_date_format_for_year_only: if 'yes', ignore 'date_format' when the
                                             metadata in the record contains a single
                                             year (4 digits).
    """
    guess_source_format_p = guess_source_format.lower() == 'yes'
    source_marc_fields = [source_marc_field.strip() for source_marc_field in source_fields.split(',')]
    source_formats = [source_format.strip() for source_format in source_formats.split(',')]
    ignore_date_format_for_year_only_p = ignore_date_format_for_year_only.lower() == 'yes'
    parsed_datetime_value = None
    first_matched_raw_date = ''
    for source_marc_field in source_marc_fields:
        date_value = bfo.field(source_marc_field)
        if date_value:
            if not first_matched_raw_date:
                first_matched_raw_date = date_value
            if ignore_date_format_for_year_only_p and \
                   date_value.isdigit() and len(date_value) == 4:
                # Year. Return as such
                return date_value
            if guess_source_format_p:
                try:
                    parsed_datetime_value = guess_datetime(date_value)
                    break
                except:
                    pass
            else:
                for source_format in source_formats:
                    try:
                        parsed_datetime_value = strptime(date_value, source_format)
                        break
                    except:
                        pass
            if parsed_datetime_value:
                # We have correctly parsed one date!
                break

    if parsed_datetime_value:
        return strftime(date_format, parsed_datetime_value)
    else:
        return first_matched_raw_date