def dump_configuration(): """Creates a dump of the data existing in the bibsort tables""" try: results = run_sql("""SELECT id, name, definition, washer FROM "bsrMETHOD" """) except Error as err: write_message("The error: [%s] occured while trying to get \ the bibsort data from the database." %err, sys.stderr) return False write_message('The bibsort data has been read from the database.', verbose=5) if results: config = ConfigParser.ConfigParser() for item in results: section = "sort_field_%s" % item[0] config.add_section(section) config.set(section, "name", item[1]) config.set(section, "definition", item[2]) config.set(section, "washer", item[3]) output_file_name = CFG_ETCDIR + '/bibsort/bibsort_db_dump_%s.cfg' % \ strftime("%d%m%Y%H%M%S", time.localtime()) write_message('Opening the output file %s' %output_file_name) try: output_file = open(output_file_name, 'w') config.write(output_file) output_file.close() except Error as err: write_message('Can not operate on the configuration file %s [%s].' \ %(output_file_name, err), stream=sys.stderr) return False write_message('Configuration data dumped to file.') else: write_message("The bsrMETHOD table does not contain any data.") return True
def add_person_comment(person_id, message): ''' Adds a comment to a person after enriching it with meta-data (date+time) @param person_id: person id to assign the comment to @type person_id: int @param message: defines the comment to set @type message: string @return the message incl. the metadata if everything was fine, False on err @rtype: string or boolean ''' msg = "" pid = -1 try: msg = str(message) pid = int(person_id) except (ValueError, TypeError): return False strtimestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime()) msg = escape(msg, quote=True) dbmsg = "%s;;;%s" % (strtimestamp, msg) dbapi.set_person_data(pid, "comment", dbmsg) return dbmsg
def dump_configuration(): """Creates a dump of the data existing in the bibsort tables""" try: results = run_sql("SELECT id, name, definition, washer FROM bsrMETHOD") except Error as err: write_message( "The error: [%s] occured while trying to get \ the bibsort data from the database." % err, sys.stderr) return False write_message('The bibsort data has been read from the database.', verbose=5) if results: config = ConfigParser.ConfigParser() for item in results: section = "sort_field_%s" % item[0] config.add_section(section) config.set(section, "name", item[1]) config.set(section, "definition", item[2]) config.set(section, "washer", item[3]) output_file_name = CFG_ETCDIR + '/bibsort/bibsort_db_dump_%s.cfg' % \ strftime("%d%m%Y%H%M%S", time.localtime()) write_message('Opening the output file %s' % output_file_name) try: output_file = open(output_file_name, 'w') config.write(output_file) output_file.close() except Error as err: write_message('Can not operate on the configuration file %s [%s].' \ %(output_file_name, err), stream=sys.stderr) return False write_message('Configuration data dumped to file.') else: write_message("The bsrMETHOD table does not contain any data.") return True
def write_to_methoddata_table(id_method, data_dict, data_dict_ordered, data_list_sorted, update_timestamp=True): """Serialize the date and write it to the bsrMETHODDATA""" write_message('Starting serializing the data..', verbose=5) serialized_data_dict = serialize_via_marshal(data_dict) serialized_data_dict_ordered = serialize_via_marshal(data_dict_ordered) serialized_data_list_sorted = serialize_via_marshal(data_list_sorted) write_message('Serialization completed.', verbose=5) date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if not update_timestamp: try: date = run_sql('SELECT last_updated from "bsrMETHODDATA" WHERE "id_bsrMETHOD" = %s', (id_method, ))[0][0] except IndexError: pass # keep the generated date write_message("Starting writing the data for method_id=%s " \ "to the database (table bsrMETHODDATA)" %id_method, verbose=5) try: write_message('Deleting old data..', verbose=5) run_sql("""DELETE FROM "bsrMETHODDATA" WHERE "id_bsrMETHOD" = %s""", (id_method, )) write_message('Inserting new data..', verbose=5) run_sql("""INSERT into "bsrMETHODDATA" ("id_bsrMETHOD", data_dict, data_dict_ordered, data_list_sorted, last_updated) VALUES (%s, %s, %s, %s, %s)""", \ (id_method, serialized_data_dict, serialized_data_dict_ordered, \ serialized_data_list_sorted, date, )) except Error as err: write_message("The error [%s] occured when inserting new bibsort data "\ "into bsrMETHODATA table" %err, sys.stderr) return False write_message('Writing to the bsrMETHODDATA successfully completed.', \ verbose=5) return True
def write_to_buckets_table(id_method, bucket_no, bucket_data, bucket_last_value, update_timestamp=True): """Serialize the date and write it to the bsrMEHODDATA_BUCKETS""" write_message('Writing the data for bucket number %s for ' \ 'method_id=%s to the database' \ %(bucket_no, id_method), verbose=5) write_message('Serializing data for bucket number %s' %bucket_no, verbose=5) serialized_bucket_data = bucket_data.fastdump() date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if not update_timestamp: try: date = run_sql('SELECT last_updated from "bsrMETHODDATABUCKET" WHERE "id_bsrMETHOD" = %s and bucket_no = %s', \ (id_method, bucket_no))[0][0] except IndexError: pass # keep the generated date try: write_message('Deleting old data.', verbose=5) run_sql("""DELETE FROM "bsrMETHODDATABUCKET" WHERE "id_bsrMETHOD" = %s AND bucket_no = %s""", \ (id_method, bucket_no, )) write_message('Inserting new data.', verbose=5) run_sql("""INSERT into "bsrMETHODDATABUCKET" ("id_bsrMETHOD", bucket_no, bucket_data, bucket_last_value, last_updated) VALUES (%s, %s, %s, %s, %s)""", \ (id_method, bucket_no, serialized_bucket_data, bucket_last_value, date, )) except Error as err: write_message("The error [%s] occured when inserting new bibsort data " \ "into bsrMETHODATA_BUCKETS table" %err, sys.stderr) return False write_message('Writing to bsrMETHODDATABUCKET for ' \ 'bucket number %s completed.' %bucket_no, verbose=5) return True
def parse_date_for_googlescholar(datetime_string): """ Parse (guess) and return the date in a format adequate for Google Scholar. We don't use dateutils.guess_datetime() as this one might lead to results not accurate enough. """ datetime_string = CFG_PUNCTUATION_PATTERN_RE.sub(' ', datetime_string) datetime_string = CFG_SPACES_PATTERN_RE.sub(' ', datetime_string) def replace_month(match_obj): "Return translated month in the matching object" month = match_obj.group(2).strip() return match_obj.group(1) + \ CFG_MONTH_NAMES_MAPPING.get(month.upper(), month) + \ match_obj.group(3) parsed_datetime = None for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(datetime_string.strip(), dateformat) break except: pass if not parsed_datetime: # Do it all again, with the translated version of the string translated_datetime_string = CFG_MONTHS_I18N_PATTERN_RE.sub( replace_month, datetime_string) for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(translated_datetime_string.strip(), dateformat) break except: pass if parsed_datetime: return strftime('%Y/%m/%d', parsed_datetime) else: # Look for a year inside the string: try: return CFG_YEAR_PATTERN_RE.search(datetime_string).group(3) except: return '' return ''
def parse_date_for_googlescholar(datetime_string): """ Parse (guess) and return the date in a format adequate for Google Scholar. We don't use dateutils.guess_datetime() as this one might lead to results not accurate enough. """ datetime_string = CFG_PUNCTUATION_PATTERN_RE.sub(' ', datetime_string) datetime_string = CFG_SPACES_PATTERN_RE.sub(' ', datetime_string) def replace_month(match_obj): "Return translated month in the matching object" month = match_obj.group(2).strip() return match_obj.group(1) + \ CFG_MONTH_NAMES_MAPPING.get(month.upper(), month) + \ match_obj.group(3) parsed_datetime = None for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(datetime_string.strip(), dateformat) break except: pass if not parsed_datetime: # Do it all again, with the translated version of the string translated_datetime_string = CFG_MONTHS_I18N_PATTERN_RE.sub(replace_month, datetime_string) for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(translated_datetime_string.strip(), dateformat) break except: pass if parsed_datetime: return strftime('%Y/%m/%d', parsed_datetime) else: # Look for a year inside the string: try: return CFG_YEAR_PATTERN_RE.search(datetime_string).group(3) except: return '' return ''
def format_element(bfo, place_label, publisher_label, date_label, separator=', ', date_format=""): """ Print imprint (Order: Name of publisher, place of publication and date of publication). Parameter <code>date_format</code> allows to specify the string representation of the output. The format string has the same behaviour as the strftime() function:: <pre>Eg: 1982-09-24 07:32:00 "%d %B %Y" -> 24 September 1982 "%I:%M" -> 07:32 </pre> @param separator: a separator between the elements of imprint @param place_label: a label to print before the publication place value @param publisher_label: a label to print before the publisher name @param date_label: a a label to print before the publication date @param date_format: date format @see: place.py, publisher.py, date.py, reprints.py, pagination.py """ place = bfo.field('260__a') publisher = bfo.field('260__b') date = bfo.field('260__c') out = "" if publisher != "sine nomine": out += publisher_label + ' ' + publisher + separator if place != "sine loco": out += place_label + ' ' + place + separator if len(date) > 0: if date_format != '': try: date_time = strptime(date, "%Y-%m-%d") out += date_label + " " + strftime(date_format, date_time) except ValueError: out += date_label + ' ' + date else: out += date_label + ' ' + date return out
def get_preformatted_record_date(recID, of): """ Returns the date of the last update of the cache for the considered preformatted record in bibfmt If corresponding record does not exist for given output format, returns None :param recID: the id of the record to fetch :param of: the output format code :return: the date of the last update of the cache, or None if not exist """ try: last_updated = Bibfmt.query\ .filter(Bibfmt.id_bibrec == recID)\ .filter(Bibfmt.format == of)\ .one().last_updated return strftime("%Y-%m-%d %H:%M:%S", last_updated) except SQLAlchemyError: return None
def _sort_dates(self, val): """ Convert: '8 nov 2010' => '2010-11-08' 'nov 2010' => '2010-11-01' '2010' => '2010-01-01' """ datetext_format = "%Y-%m-%d" try: datestruct = strptime(val, datetext_format) except ValueError: try: datestruct = strptime(val, "%d %b %Y") except ValueError: try: datestruct = strptime(val, "%b %Y") except ValueError: try: datestruct = strptime(val, "%Y") except ValueError: return val return strftime(datetext_format, datestruct)
def write_to_methoddata_table(id_method, data_dict, data_dict_ordered, data_list_sorted, update_timestamp=True): """Serialize the date and write it to the bsrMETHODDATA""" write_message('Starting serializing the data..', verbose=5) serialized_data_dict = serialize_via_marshal(data_dict) serialized_data_dict_ordered = serialize_via_marshal(data_dict_ordered) serialized_data_list_sorted = serialize_via_marshal(data_list_sorted) write_message('Serialization completed.', verbose=5) date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if not update_timestamp: try: date = run_sql( 'SELECT last_updated from bsrMETHODDATA WHERE id_bsrMETHOD = %s', (id_method, ))[0][0] except IndexError: pass # keep the generated date write_message("Starting writing the data for method_id=%s " \ "to the database (table bsrMETHODDATA)" %id_method, verbose=5) try: write_message('Deleting old data..', verbose=5) run_sql("DELETE FROM bsrMETHODDATA WHERE id_bsrMETHOD = %s", (id_method, )) write_message('Inserting new data..', verbose=5) run_sql("INSERT into bsrMETHODDATA \ (id_bsrMETHOD, data_dict, data_dict_ordered, data_list_sorted, last_updated) \ VALUES (%s, %s, %s, %s, %s)" , \ (id_method, serialized_data_dict, serialized_data_dict_ordered, \ serialized_data_list_sorted, date, )) except Error as err: write_message("The error [%s] occured when inserting new bibsort data "\ "into bsrMETHODATA table" %err, sys.stderr) return False write_message('Writing to the bsrMETHODDATA successfully completed.', \ verbose=5) return True
def write_to_buckets_table(id_method, bucket_no, bucket_data, bucket_last_value, update_timestamp=True): """Serialize the date and write it to the bsrMEHODDATA_BUCKETS""" write_message('Writing the data for bucket number %s for ' \ 'method_id=%s to the database' \ %(bucket_no, id_method), verbose=5) write_message('Serializing data for bucket number %s' % bucket_no, verbose=5) serialized_bucket_data = bucket_data.fastdump() date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if not update_timestamp: try: date = run_sql('SELECT last_updated from bsrMETHODDATABUCKET WHERE id_bsrMETHOD = %s and bucket_no = %s', \ (id_method, bucket_no))[0][0] except IndexError: pass # keep the generated date try: write_message('Deleting old data.', verbose=5) run_sql("DELETE FROM bsrMETHODDATABUCKET \ WHERE id_bsrMETHOD = %s AND bucket_no = %s" , \ (id_method, bucket_no, )) write_message('Inserting new data.', verbose=5) run_sql("INSERT into bsrMETHODDATABUCKET \ (id_bsrMETHOD, bucket_no, bucket_data, bucket_last_value, last_updated) \ VALUES (%s, %s, %s, %s, %s)" , \ (id_method, bucket_no, serialized_bucket_data, bucket_last_value, date, )) except Error as err: write_message("The error [%s] occured when inserting new bibsort data " \ "into bsrMETHODATA_BUCKETS table" %err, sys.stderr) return False write_message('Writing to bsrMETHODDATABUCKET for ' \ 'bucket number %s completed.' %bucket_no, verbose=5) return True
def test_strftime_date_under_1900(self): test_date = "3.1.1765" expected = "Thu, 03 Jan 1765 00:00:00 +0000" result = dateutils.strftime("%a, %d %b %Y %H:%M:%S +0000", strptime(test_date, "%d.%m.%Y")) self.assertEqual(expected, result)
def test_strftime_date_over_1900(self): test_date = "12.03.1908" expected = "Thu, 12 Mar 1908 00:00:00 +0000" result = dateutils.strftime("%a, %d %b %Y %H:%M:%S +0000", strptime(test_date, "%d.%m.%Y")) self.assertEqual(expected, result)
def perform_update_buckets(recids_current_ordered, recids_to_insert, recids_old_ordered, method_id, update_timestamp = True): """Updates the buckets""" bucket_insert = {} bucket_delete = {} write_message("Updating the buckets for method_id = %s" %method_id, verbose=5) buckets = run_sql("""SELECT bucket_no, bucket_last_value FROM "bsrMETHODDATABUCKET" WHERE "id_bsrMETHOD" = %s""", (method_id, )) if not buckets: write_message("No bucket data found for method_id %s." \ %method_id, sys.stderr) raise Exception #sort the buckets to be sure we are iterating them in order(1 to max): buckets_dict = dict(buckets) for recid in recids_to_insert: for bucket_no in buckets_dict: if recids_current_ordered[recid] <= buckets_dict[bucket_no]: bucket_insert.setdefault(bucket_no, []).append(recid) break for recid in recids_old_ordered: record_inserted = 0 record_deleted = 0 for bucket_no in buckets_dict: bucket_value = int(buckets_dict[bucket_no]) if record_inserted and record_deleted: #both insertion and deletion have been registered break if recids_current_ordered[recid] <= bucket_value and \ recids_old_ordered[recid] <= bucket_value and \ not record_inserted and \ not record_deleted: #both before and after the modif, #recid should be in the same bucket -> nothing to do break if recids_current_ordered[recid] <= bucket_value and not record_inserted: #recid should be, after the modif, here, so insert bucket_insert.setdefault(bucket_no, []).append(recid) record_inserted = 1 if recids_old_ordered[recid] <= bucket_value and not record_deleted: #recid was here before modif, must be removed bucket_delete.setdefault(bucket_no, []).append(recid) record_deleted = 1 for bucket_no in buckets_dict: if (bucket_no in bucket_insert) or (bucket_no in bucket_delete): res = run_sql("""SELECT bucket_data FROM "bsrMETHODDATABUCKET" where "id_bsrMETHOD" = %s AND bucket_no = %s""", \ (method_id, bucket_no, )) bucket_data = intbitset(res[0][0]) for recid in bucket_insert.get(bucket_no, []): bucket_data.add(recid) for recid in bucket_delete.get(bucket_no, []): if recid in bucket_data: bucket_data.remove(recid) if update_timestamp: date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) run_sql("""UPDATE "bsrMETHODDATABUCKET" SET bucket_data = %s, last_updated = %s WHERE "id_bsrMETHOD" = %s AND bucket_no = %s""", \ (bucket_data.fastdump(), date, method_id, bucket_no, )) else: run_sql("""UPDATE "bsrMETHODDATABUCKET" SET bucket_data = %s WHERE "id_bsrMETHOD" = %s AND bucket_no = %s""", \ (bucket_data.fastdump(), method_id, bucket_no, )) write_message("Updating bucket %s for method %s." %(bucket_no, method_id), verbose=5)
def perform_update_buckets(recids_current_ordered, recids_to_insert, recids_old_ordered, method_id, update_timestamp=True): """Updates the buckets""" bucket_insert = {} bucket_delete = {} write_message("Updating the buckets for method_id = %s" % method_id, verbose=5) buckets = run_sql( "SELECT bucket_no, bucket_last_value \ FROM bsrMETHODDATABUCKET \ WHERE id_bsrMETHOD = %s", (method_id, )) if not buckets: write_message("No bucket data found for method_id %s." \ %method_id, sys.stderr) raise Exception #sort the buckets to be sure we are iterating them in order(1 to max): buckets_dict = dict(buckets) for recid in recids_to_insert: for bucket_no in buckets_dict: if recids_current_ordered[recid] <= buckets_dict[bucket_no]: bucket_insert.setdefault(bucket_no, []).append(recid) break for recid in recids_old_ordered: record_inserted = 0 record_deleted = 0 for bucket_no in buckets_dict: bucket_value = int(buckets_dict[bucket_no]) if record_inserted and record_deleted: #both insertion and deletion have been registered break if recids_current_ordered[recid] <= bucket_value and \ recids_old_ordered[recid] <= bucket_value and \ not record_inserted and \ not record_deleted: #both before and after the modif, #recid should be in the same bucket -> nothing to do break if recids_current_ordered[ recid] <= bucket_value and not record_inserted: #recid should be, after the modif, here, so insert bucket_insert.setdefault(bucket_no, []).append(recid) record_inserted = 1 if recids_old_ordered[recid] <= bucket_value and not record_deleted: #recid was here before modif, must be removed bucket_delete.setdefault(bucket_no, []).append(recid) record_deleted = 1 for bucket_no in buckets_dict: if (bucket_no in bucket_insert) or (bucket_no in bucket_delete): res = run_sql("SELECT bucket_data FROM bsrMETHODDATABUCKET \ where id_bsrMETHOD = %s AND bucket_no = %s" , \ (method_id, bucket_no, )) bucket_data = intbitset(res[0][0]) for recid in bucket_insert.get(bucket_no, []): bucket_data.add(recid) for recid in bucket_delete.get(bucket_no, []): if recid in bucket_data: bucket_data.remove(recid) if update_timestamp: date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) run_sql("UPDATE bsrMETHODDATABUCKET \ SET bucket_data = %s, last_updated = %s \ WHERE id_bsrMETHOD = %s AND bucket_no = %s" , \ (bucket_data.fastdump(), date, method_id, bucket_no, )) else: run_sql("UPDATE bsrMETHODDATABUCKET \ SET bucket_data = %s \ WHERE id_bsrMETHOD = %s AND bucket_no = %s" , \ (bucket_data.fastdump(), method_id, bucket_no, )) write_message("Updating bucket %s for method %s." % (bucket_no, method_id), verbose=5)
def _get_year(self): """Return the year of publication.""" if "publication_date" in self.record: return strftime("%Y", self.record["publication_date"]) else: return ""
def format_element(bfo, date_format='%d %B %Y', source_formats='%Y-%m-%d', source_fields="260__c", guess_source_format="no", ignore_date_format_for_year_only="yes"): """ Prints the imprint publication date. Parameter <code>date_format</code> allows to specify the string representation of the output. The format string has the same behaviour as the strftime() function: <pre>Eg: 1982-09-24 07:32:00 "%d %B %Y" -> 24 September 1982 "%I:%M" -> 07:32 </pre> Note that if input date is simply a year (4 digits), it is returned as such if <code>ignore_date_format_for_year_only</code> is set to 'yes', regardless of <code>date_format</code>. Parameter <code>source_formats</code> allows to specify the expected format of the date in the metadata. If the format does not match, the date cannot be parsed, and cannot be formatted according to <code>date_format</code>. Comma-separated values can be provided in order to test several input formats. Parameter <code>source_fields</code> defined the list of MARC fields where we would like to retrieve the date. First one matching <code>source_formats</code> is used. if none, fall back to first non-empty one. Parameter <code>guess_source_formats</code> when set to 'yes' allows to guess the date source format. @see: pagination.py, publisher.py, reprints.py, imprint.py, place.py @param date_format: output date format. @param source_formats: expected (comma-separated values) input date format. @param source_fields: the MARC fields (comma-separated values) to look up for the date. First non-empty one is used. @param guess_source_format: if 'yes', ignore 'source_format' and try to guess format using Python mxDateTime module. #param ignore_date_format_for_year_only: if 'yes', ignore 'date_format' when the metadata in the record contains a single year (4 digits). """ guess_source_format_p = guess_source_format.lower() == 'yes' source_marc_fields = [source_marc_field.strip() for source_marc_field in source_fields.split(',')] source_formats = [source_format.strip() for source_format in source_formats.split(',')] ignore_date_format_for_year_only_p = ignore_date_format_for_year_only.lower() == 'yes' parsed_datetime_value = None first_matched_raw_date = '' for source_marc_field in source_marc_fields: date_value = bfo.field(source_marc_field) if date_value: if not first_matched_raw_date: first_matched_raw_date = date_value if ignore_date_format_for_year_only_p and \ date_value.isdigit() and len(date_value) == 4: # Year. Return as such return date_value if guess_source_format_p: try: parsed_datetime_value = guess_datetime(date_value) break except: pass else: for source_format in source_formats: try: parsed_datetime_value = strptime(date_value, source_format) break except: pass if parsed_datetime_value: # We have correctly parsed one date! break if parsed_datetime_value: return strftime(date_format, parsed_datetime_value) else: return first_matched_raw_date
def test_strftime_date_over_1900_object(self): expected = "Thu, 12 Mar 1908 00:00:00 +0000" result = dateutils.strftime("%a, %d %b %Y %H:%M:%S +0000", datetime.date(1908, 3, 12)) self.assertEqual(expected, result)
def test_strftime_date_over_1900_object(self): test_date = datetime.date(1908, 3, 12) expected = "Thu, 12 Mar 1908 00:00:00 +0000" result = dateutils.strftime("%a, %d %b %Y %H:%M:%S +0000", test_date) self.assertEqual(expected, result)
def test_strftime_date_under_1900_object(self): test_date = datetime.date(1765, 1, 3) expected = "Thu, 03 Jan 1765 00:00:00 +0000" result = dateutils.strftime("%a, %d %b %Y %H:%M:%S +0000", test_date) self.assertEqual(expected, result)
def test_strftime_date_under_1900_object(self): expected = "Thu, 03 Jan 1765 00:00:00 +0000" result = dateutils.strftime("%a, %d %b %Y %H:%M:%S +0000", datetime.date(1765, 1, 3)) self.assertEqual(expected, result)