def check_records(records, date_fields=CFG_DEFAULT_DATE_FIELDS): """ Backdate the earliest_date of a record to reflect the earliest date information available. Note in case of partial dates (e.g. just the year or just year-month), if there is another valid date within the """ for record in records: dates = [] recid = int(record["001"][0][3]) creation_date, modification_date, earliest_date = run_sql("SELECT creation_date, modification_date, earliest_date FROM bibrec WHERE id=%s", (recid, ))[0] creation_date = strftime("%Y-%m-%d %H:%M:%S", creation_date) modification_date = strftime("%Y-%m-%d %H:%M:%S", modification_date) earliest_date = strftime("%Y-%m-%d %H:%M:%S", earliest_date) dates.append(creation_date) dates.append(modification_date) if '005' in record: dates.append(strftime("%Y-%m-%d %H:%M:%S", strptime(record["005"][0][3], "%Y%m%d%H%M%S.0"))) for position, value in record.iterfields(date_fields): for format in CFG_POSSIBLE_DATE_FORMATS: try: parsed_date = strftime("%Y-%m-%d 00:00:00", (strptime(value, format))) dates.append(parsed_date) break except ValueError: pass else: for format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR_MONTH: try: parsed_date = strftime("%Y-%m-99 00:00:00", (strptime(value, format))) dates.append(parsed_date) break except ValueError: pass else: for format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR: try: parsed_date = strftime("%Y-99-99 00:00:00", (strptime(value, format))) dates.append(parsed_date) break except ValueError: pass min_date = min(dates) ## Let's restore meaningful first month and day min_date = min_date.replace("-99", "-01") if min_date != earliest_date: run_sql("UPDATE bibrec SET earliest_date=%s WHERE id=%s", (min_date, recid)) record.warn("record earliest_date amended from %s to %s" % (earliest_date, min_date))
def calculate_citation_history_coordinates(recid): """Return a list of citation graph coordinates for RECID, sorted by year.""" result = {} for year in calculate_citation_graphe_x_coordinates(recid): result[year] = 0 if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS: # do not generate graphs that have less than X points return [] for recid in get_cited_by(recid): rec_date = get_record_year(recid) # Some records simlpy do not have these fields if rec_date: # Maybe rec_date[0][0:4] has a typo and cannot # be converted to an int try: d = strptime(rec_date[0][:4], '%Y') except ValueError: pass else: if d.year in result: result[d.year] += 1 return sorted(result.iteritems())
def check_records(records, date_fields=CFG_DEFAULT_DATE_FIELDS): """Corrects all dates!""" for record in records: for position, value in record.iterfields(date_fields): for date_format in CFG_POSSIBLE_DATE_FORMATS: try: parsed_date = strftime("%Y-%m-%d", (strptime(value, date_format))) if position[0] in CFG_ONLY_YEAR_FIELDS: parsed_date = parsed_date[:4] if parsed_date != value: record.amend_field(position, parsed_date) break except ValueError: pass else: for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR_MONTH: try: parsed_date = strftime("%Y-%m", (strptime(value, date_format))) if position[0] in CFG_ONLY_YEAR_FIELDS: parsed_date = parsed_date[:4] if parsed_date != value: record.amend_field(position, parsed_date) break except ValueError: pass else: for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR: try: parsed_date = strftime( "%Y", (strptime(value, date_format))) if parsed_date != value: record.amend_field(position, parsed_date) break except ValueError: pass else: record.set_invalid( "Cannot recognize date %s in position %s" % (value, position))
def _sort_dates(self, val): """ Convert: '8 nov 2010' => '2010-11-08' 'nov 2010' => '2010-11-01' '2010' => '2010-01-01' """ datetext_format = "%Y-%m-%d" try: datestruct = strptime(val, datetext_format) except ValueError: try: datestruct = strptime(val, "%d %b %Y") except ValueError: try: datestruct = strptime(val, "%b %Y") except ValueError: try: datestruct = strptime(val, "%Y") except ValueError: return val return strftime(datetext_format, datestruct)
def check_records(records, date_fields=CFG_DEFAULT_DATE_FIELDS): """Corrects all dates!""" for record in records: for position, value in record.iterfields(date_fields): for date_format in CFG_POSSIBLE_DATE_FORMATS: try: parsed_date = strftime("%Y-%m-%d", (strptime(value, date_format))) if position[0] in CFG_ONLY_YEAR_FIELDS: parsed_date = parsed_date[:4] if parsed_date != value: record.amend_field(position, parsed_date) break except ValueError: pass else: for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR_MONTH: try: parsed_date = strftime("%Y-%m", (strptime(value, date_format))) if position[0] in CFG_ONLY_YEAR_FIELDS: parsed_date = parsed_date[:4] if parsed_date != value: record.amend_field(position, parsed_date) break except ValueError: pass else: for date_format in CFG_POSSIBLE_DATE_FORMATS_ONLY_YEAR: try: parsed_date = strftime("%Y", (strptime(value, date_format))) if parsed_date != value: record.amend_field(position, parsed_date) break except ValueError: pass else: record.set_invalid("Cannot recognize date %s in position %s" % (value, position))