Пример #1
0
def check_records(records, date_fields=CFG_DEFAULT_DATE_FIELDS):
    """
    Backdate the earliest_date of a record to reflect the earliest date information
    available.
    Note in case of partial dates (e.g. just the year or just year-month), if there
    is another valid date within the
    """
    for record in records:
        dates = []
        recid = int(record["001"][0][3])

        creation_date, modification_date, earliest_date = run_sql("SELECT creation_date, modification_date, earliest_date FROM bibrec WHERE id=%s", (recid, ))[0]
        creation_date = strftime("%Y-%m-%d %H:%M:%S", creation_date)
        modification_date = strftime("%Y-%m-%d %H:%M:%S", modification_date)
        earliest_date = strftime("%Y-%m-%d %H:%M:%S", earliest_date)
        dates.append(creation_date)
        dates.append(modification_date)

        if '005' in record:
            dates.append(strftime("%Y-%m-%d %H:%M:%S", strptime(record["005"][0][3], "%Y%m%d%H%M%S.0")))
        for position, value in record.iterfields(date_fields):
            for format in CFG_POSSIBLE_DATE_FORMATS:
                try:
                    parsed_date = strftime("%Y-%m-%d 00:00:00", (strptime(value, format)))
                    dates.append(parsed_date)
                    break
                except ValueError:
                    pass
            else:
                for format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR_MONTH:
                    try:
                        parsed_date = strftime("%Y-%m-99 00:00:00", (strptime(value, format)))
                        dates.append(parsed_date)
                        break
                    except ValueError:
                        pass
                else:
                    for format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR:
                        try:
                            parsed_date = strftime("%Y-99-99 00:00:00", (strptime(value, format)))
                            dates.append(parsed_date)
                            break
                        except ValueError:
                            pass
        min_date = min(dates)
        ## Let's restore meaningful first month and day
        min_date = min_date.replace("-99", "-01")
        if min_date != earliest_date:
            run_sql("UPDATE bibrec SET earliest_date=%s WHERE id=%s", (min_date, recid))
            record.warn("record earliest_date amended from %s to %s" % (earliest_date, min_date))
Пример #2
0
def calculate_citation_history_coordinates(recid):
    """Return a list of citation graph coordinates for RECID, sorted by year."""
    result = {}
    for year in calculate_citation_graphe_x_coordinates(recid):
        result[year] = 0

    if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS:
        # do not generate graphs that have less than X points
        return []

    for recid in get_cited_by(recid):
        rec_date = get_record_year(recid)
        # Some records simlpy do not have these fields
        if rec_date:
            # Maybe rec_date[0][0:4] has a typo and cannot
            # be converted to an int
            try:
                d = strptime(rec_date[0][:4], '%Y')
            except ValueError:
                pass
            else:
                if d.year in result:
                    result[d.year] += 1

    return sorted(result.iteritems())
def calculate_citation_history_coordinates(recid):
    """Return a list of citation graph coordinates for RECID, sorted by year."""
    result = {}
    for year in calculate_citation_graphe_x_coordinates(recid):
        result[year] = 0

    if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS:
        # do not generate graphs that have less than X points
        return []

    for recid in get_cited_by(recid):
        rec_date = get_record_year(recid)
        # Some records simlpy do not have these fields
        if rec_date:
            # Maybe rec_date[0][0:4] has a typo and cannot
            # be converted to an int
            try:
                d = strptime(rec_date[0][:4], '%Y')
            except ValueError:
                pass
            else:
                if d.year in result:
                    result[d.year] += 1

    return sorted(result.iteritems())
Пример #4
0
def check_records(records, date_fields=CFG_DEFAULT_DATE_FIELDS):
    """Corrects all dates!"""
    for record in records:
        for position, value in record.iterfields(date_fields):
            for date_format in CFG_POSSIBLE_DATE_FORMATS:
                try:
                    parsed_date = strftime("%Y-%m-%d",
                                           (strptime(value, date_format)))
                    if position[0] in CFG_ONLY_YEAR_FIELDS:
                        parsed_date = parsed_date[:4]
                    if parsed_date != value:
                        record.amend_field(position, parsed_date)
                    break
                except ValueError:
                    pass
            else:
                for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR_MONTH:
                    try:
                        parsed_date = strftime("%Y-%m",
                                               (strptime(value, date_format)))
                        if position[0] in CFG_ONLY_YEAR_FIELDS:
                            parsed_date = parsed_date[:4]
                        if parsed_date != value:
                            record.amend_field(position, parsed_date)
                        break
                    except ValueError:
                        pass
                else:
                    for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR:
                        try:
                            parsed_date = strftime(
                                "%Y", (strptime(value, date_format)))
                            if parsed_date != value:
                                record.amend_field(position, parsed_date)
                            break
                        except ValueError:
                            pass
                    else:
                        record.set_invalid(
                            "Cannot recognize date %s in position %s" %
                            (value, position))
Пример #5
0
 def _sort_dates(self, val):
     """
     Convert:
     '8 nov 2010' => '2010-11-08'
     'nov 2010' => '2010-11-01'
     '2010' => '2010-01-01'
     """
     datetext_format = "%Y-%m-%d"
     try:
         datestruct = strptime(val, datetext_format)
     except ValueError:
         try:
             datestruct = strptime(val, "%d %b %Y")
         except ValueError:
             try:
                 datestruct = strptime(val, "%b %Y")
             except ValueError:
                 try:
                     datestruct = strptime(val, "%Y")
                 except ValueError:
                     return val
     return strftime(datetext_format, datestruct)
Пример #6
0
def check_records(records, date_fields=CFG_DEFAULT_DATE_FIELDS):
    """Corrects all dates!"""
    for record in records:
        for position, value in record.iterfields(date_fields):
            for date_format in CFG_POSSIBLE_DATE_FORMATS:
                try:
                    parsed_date = strftime("%Y-%m-%d", (strptime(value, date_format)))
                    if position[0] in CFG_ONLY_YEAR_FIELDS:
                        parsed_date = parsed_date[:4]
                    if parsed_date != value:
                        record.amend_field(position, parsed_date)
                    break
                except ValueError:
                    pass
            else:
                for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR_MONTH:
                    try:
                        parsed_date = strftime("%Y-%m", (strptime(value, date_format)))
                        if position[0] in CFG_ONLY_YEAR_FIELDS:
                            parsed_date = parsed_date[:4]
                        if parsed_date != value:
                            record.amend_field(position, parsed_date)
                        break
                    except ValueError:
                        pass
                else:
                    for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR:
                        try:
                            parsed_date = strftime("%Y", (strptime(value, date_format)))
                            if parsed_date != value:
                                record.amend_field(position, parsed_date)
                            break
                        except ValueError:
                            pass
                    else:
                        record.set_invalid("Cannot recognize date %s in position %s" % (value, position))