def cli_clean_revisions(recid, dry_run=True, verbose=True): """Clean revisions of the given recid, by removing duplicate revisions that do not change the content of the record.""" if recid == '*': recids = intbitset(run_sql("SELECT DISTINCT id_bibrec FROM hstRECORD")) else: try: recids = [int(recid)] except ValueError: print('ERROR: record ID must be integer, not %s.' % recid) sys.exit(1) for recid in recids: all_revisions = run_sql("SELECT marcxml, job_id, job_name, job_person, job_date FROM hstRECORD WHERE id_bibrec=%s ORDER BY job_date ASC", (recid,)) previous_rec = {} deleted_revisions = 0 for marcxml, job_id, job_name, job_person, job_date in all_revisions: try: current_rec = create_record(zlib.decompress(marcxml))[0] except Exception: print("ERROR: corrupted revisions found. Please run %s --fix-revisions '*'" % sys.argv[0], file=sys.stderr) sys.exit(1) if records_identical(current_rec, previous_rec): deleted_revisions += 1 if not dry_run: run_sql("DELETE FROM hstRECORD WHERE id_bibrec=%s AND job_id=%s AND job_name=%s AND job_person=%s AND job_date=%s", (recid, job_id, job_name, job_person, job_date)) previous_rec = current_rec if verbose and deleted_revisions: print("record %s: deleted %s duplicate revisions out of %s" % (recid, deleted_revisions, len(all_revisions))) if verbose: print("DONE")
def repair_role_definitions(): """ Try to rebuild compiled serialized definitions from their respectives sources. This is needed in case Python break back compatibility. """ definitions = run_sql("SELECT id, firerole_def_src FROM accROLE") for role_id, firerole_def_src in definitions: run_sql("UPDATE accROLE SET firerole_def_ser=%s WHERE id=%s", (serialize(compile_role_definition(firerole_def_src)), role_id))
def cb_session_cleanup(data=None): """ Session cleanup procedure which to be executed at the end of the request handling. """ run_sql("""DELETE LOW_PRIORITY FROM session WHERE session_expiry <= UTC_TIMESTAMP()""")
def __import_records(): """""" query1 = """SELECT bsk.id, ubsk.id_user, rec.id_record, rec.nb_order, DATE_FORMAT(ubsk.date_modification, '%Y-%m-%d %H:%i:%s') FROM basket bsk, user_basket ubsk, basket_record rec WHERE bsk.id=ubsk.id_basket AND bsk.id=rec.id_basket ORDER BY bsk.id""" records = run_sql(query1) def records_updater(record): (bskid, id_owner, id_record, order, date_modification) = record return "(%i,%i,%i,%i,'%s')" % (int(id_record), int(bskid), int(id_owner), int(order), real_escape_string(date_modification)) query2 = """INSERT INTO bskREC (id_bibrec_or_bskEXTREC, id_bskBASKET, id_user_who_added_item, score, date_added) VALUES %s""" iterator = 0 while iterator < len(records): temp_val = reduce(lambda x, y: x + ',' + y, map(records_updater, records[iterator:iterator+10000])) run_sql(query2 % temp_val) if iterator + 10000 <= len(records): last_rec = iterator + 10000 else: last_rec = len(records) print(" Inserting records %i-%i out of %i" % (iterator, last_rec, len(records) )) iterator = iterator + 10000 return len(records)
def write_to_buckets_table(id_method, bucket_no, bucket_data, bucket_last_value, update_timestamp=True): """Serialize the date and write it to the bsrMEHODDATA_BUCKETS""" write_message('Writing the data for bucket number %s for ' \ 'method_id=%s to the database' \ %(bucket_no, id_method), verbose=5) write_message('Serializing data for bucket number %s' %bucket_no, verbose=5) serialized_bucket_data = bucket_data.fastdump() date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if not update_timestamp: try: date = run_sql('SELECT last_updated from "bsrMETHODDATABUCKET" WHERE "id_bsrMETHOD" = %s and bucket_no = %s', \ (id_method, bucket_no))[0][0] except IndexError: pass # keep the generated date try: write_message('Deleting old data.', verbose=5) run_sql("""DELETE FROM "bsrMETHODDATABUCKET" WHERE "id_bsrMETHOD" = %s AND bucket_no = %s""", \ (id_method, bucket_no, )) write_message('Inserting new data.', verbose=5) run_sql("""INSERT into "bsrMETHODDATABUCKET" ("id_bsrMETHOD", bucket_no, bucket_data, bucket_last_value, last_updated) VALUES (%s, %s, %s, %s, %s)""", \ (id_method, bucket_no, serialized_bucket_data, bucket_last_value, date, )) except Error as err: write_message("The error [%s] occured when inserting new bibsort data " \ "into bsrMETHODATA_BUCKETS table" %err, sys.stderr) return False write_message('Writing to bsrMETHODDATABUCKET for ' \ 'bucket number %s completed.' %bucket_no, verbose=5) return True
def _update_database_structure_post(logger): """The function alters the already existing database by removing columns. the step after the modification """ logger.info("Removing unnecessary columns from tables") run_sql("ALTER TABLE bibdoc DROP COLUMN more_info")
def get_modified_or_inserted_recs(method_list): """Returns a list of recids that have been inserted or modified since the last update of the bibsort methods in method_list method_list should already contain a list of methods that SHOULD be updated, if it contains new methods, an error will be thrown""" if not method_list: #just to be on the safe side return 0 try: query = """SELECT min(d.last_updated) from "bsrMETHODDATA" d, "bsrMETHOD" m WHERE m.name in (%s) AND d."id_bsrMETHOD" = m.id""" % \ ("%s," * len(method_list))[:-1] last_updated = str(run_sql(query, tuple(method_list))[0][0]) except Error as err: write_message("Error when trying to get the last_updated date " \ "from bsrMETHODDATA: [%s]" %err, sys.stderr) return 0 recids = [] try: results = run_sql("SELECT id from bibrec \ where modification_date >= %s", (last_updated, )) if results: recids = [result[0] for result in results] except Error as err: write_message("Error when trying to get the list of " \ "modified records: [%s]" %err, sys.stderr) return 0 return recids
def clean_bibxxx(): """ Clean unreferenced bibliographic values from bibXXx tables. This is useful to prettify browse results, as it removes old, no longer used values. WARNING: this function must be run only when no bibupload is running and/or sleeping. """ write_message("""CLEANING OF UNREFERENCED bibXXx VALUES STARTED""") for xx in range(0, 100): bibxxx = 'bib%02dx' % xx bibrec_bibxxx = 'bibrec_bib%02dx' % xx if task_get_option('verbose') >= 9: num_unref_values = run_sql("""SELECT COUNT(*) FROM %(bibxxx)s LEFT JOIN %(bibrec_bibxxx)s ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx, })[0][0] run_sql("""DELETE %(bibxxx)s FROM %(bibxxx)s LEFT JOIN %(bibrec_bibxxx)s ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx, }) if task_get_option('verbose') >= 9: write_message(""" - %d unreferenced %s values cleaned""" % \ (num_unref_values, bibxxx)) write_message("""CLEANING OF UNREFERENCED bibXXx VALUES FINISHED""")
def write_to_methoddata_table(id_method, data_dict, data_dict_ordered, data_list_sorted, update_timestamp=True): """Serialize the date and write it to the bsrMETHODDATA""" write_message('Starting serializing the data..', verbose=5) serialized_data_dict = serialize_via_marshal(data_dict) serialized_data_dict_ordered = serialize_via_marshal(data_dict_ordered) serialized_data_list_sorted = serialize_via_marshal(data_list_sorted) write_message('Serialization completed.', verbose=5) date = strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if not update_timestamp: try: date = run_sql('SELECT last_updated from "bsrMETHODDATA" WHERE "id_bsrMETHOD" = %s', (id_method, ))[0][0] except IndexError: pass # keep the generated date write_message("Starting writing the data for method_id=%s " \ "to the database (table bsrMETHODDATA)" %id_method, verbose=5) try: write_message('Deleting old data..', verbose=5) run_sql("""DELETE FROM "bsrMETHODDATA" WHERE "id_bsrMETHOD" = %s""", (id_method, )) write_message('Inserting new data..', verbose=5) run_sql("""INSERT into "bsrMETHODDATA" ("id_bsrMETHOD", data_dict, data_dict_ordered, data_list_sorted, last_updated) VALUES (%s, %s, %s, %s, %s)""", \ (id_method, serialized_data_dict, serialized_data_dict_ordered, \ serialized_data_list_sorted, date, )) except Error as err: write_message("The error [%s] occured when inserting new bibsort data "\ "into bsrMETHODATA table" %err, sys.stderr) return False write_message('Writing to the bsrMETHODDATA successfully completed.', \ verbose=5) return True
def acc_firerole_extract_emails(firerole_def_obj): """ Best effort function to extract all the possible email addresses authorized by the given firerole. """ authorized_emails = set() try: default_allow_p, rules = firerole_def_obj for (allow_p, not_p, field, expressions_list) in rules: # for every rule if not_p: continue if field == 'group': for reg_p, expr in expressions_list: if reg_p: continue if cfg['CFG_CERN_SITE'] and expr.endswith(' [CERN]'): authorized_emails.add(expr[:-len(' [CERN]')].lower().strip() + '@cern.ch') emails = run_sql("SELECT user.email FROM usergroup JOIN user_usergroup ON usergroup.id=user_usergroup.id_usergroup JOIN user ON user.id=user_usergroup.id_user WHERE usergroup.name=%s", (expr, )) for email in emails: authorized_emails.add(email[0].lower().strip()) elif field == 'email': for reg_p, expr in expressions_list: if reg_p: continue authorized_emails.add(expr.lower().strip()) elif field == 'uid': for reg_p, expr in expressions_list: if reg_p: continue email = run_sql("SELECT email FROM user WHERE id=%s", (expr, )) if email: authorized_emails.add(email[0][0].lower().strip()) return authorized_emails except Exception as msg: raise InvenioWebAccessFireroleError, msg
def do_upgrade(): stmt = run_sql('SHOW CREATE TABLE crcILLREQUEST')[0][1] if '`overdue_letter_number` int(3)' not in stmt: run_sql( "ALTER TABLE crcILLREQUEST ADD COLUMN overdue_letter_number int(3) unsigned NOT NULL default '0'") if '`overdue_letter_date` datetime' not in stmt: run_sql("ALTER TABLE crcILLREQUEST ADD COLUMN overdue_letter_date datetime NOT NULL default '0000-00-00 00:00:00'")
def mysql_info(separator=None, line_format=None): """Detect and print MySQL details. Useful for debugging problems on various OS. """ from invenio.ext.sqlalchemy import db if db.engine.name != 'mysql': raise Exception('Database engine is not mysql.') from invenio.legacy.dbquery import run_sql out = [] for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \ run_sql("SHOW VARIABLES LIKE 'charact%'") + \ run_sql("SHOW VARIABLES LIKE 'collat%'"): if False: print(" - %s: %s" % (key, val)) elif key in ['version', 'character_set_client', 'character_set_connection', 'character_set_database', 'character_set_results', 'character_set_server', 'character_set_system', 'collation_connection', 'collation_database', 'collation_server']: out.append((key, val)) if separator is not None: if line_format is None: line_format = "%s: %s" return separator.join(map(lambda i: line_format % i, out)) return dict(out)
def do_upgrade(): run_sql("""CREATE TABLE IF NOT EXISTS schSTATUS ( name varchar(50), value mediumblob, PRIMARY KEY (name) ) ENGINE=MyISAM """)
def is_method_valid(colID, rank_method_code): """ Check if RANK_METHOD_CODE method is valid for the collection given. If colID is None, then check for existence regardless of collection. """ if colID is None: return run_sql("SELECT COUNT(*) FROM rnkMETHOD WHERE name=%s", (rank_method_code,))[0][0] enabled_colls = dict(run_sql("SELECT id_collection, score from collection_rnkMETHOD,rnkMETHOD WHERE id_rnkMETHOD=rnkMETHOD.id AND name=%s", (rank_method_code,))) try: colID = int(colID) except TypeError: return 0 if colID in enabled_colls: return 1 else: while colID: colID = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s", (colID,)) if colID and colID[0][0] in enabled_colls: return 1 elif colID: colID = colID[0][0] return 0
def update_user_inbox_for_reminders(uid): """ Updates user's inbox with any reminders that should have arrived @param uid: user id @return: integer number of new expired reminders """ now = convert_datestruct_to_datetext(localtime()) reminder_status = CFG_WEBMESSAGE_STATUS_CODE["REMINDER"] new_status = CFG_WEBMESSAGE_STATUS_CODE["NEW"] query1 = """SELECT m.id FROM msgMESSAGE m, user_msgMESSAGE um WHERE um.id_user_to=%s AND um.id_msgMESSAGE=m.id AND m.received_date<=%s AND um.status like binary %s """ params1 = (uid, now, reminder_status) res_ids = run_sql(query1, params1) out = len(res_ids) if out > 0: query2 = """UPDATE user_msgMESSAGE SET status=%s WHERE id_user_to=%s AND (""" query_params = [new_status, uid] for msg_id in res_ids[0:-1]: query2 += "id_msgMESSAGE=%s OR " query_params.append(msg_id[0]) query2 += "id_msgMESSAGE=%s)" query_params.append(res_ids[-1][0]) run_sql(query2, tuple(query_params)) return out
def filter_out_based_on_date_range(recids, fromdate="", untildate="", set_spec=None): """ Filter out recids based on date range.""" if fromdate: fromdate = normalize_date(fromdate, "T00:00:00Z") else: fromdate = get_earliest_datestamp() fromdate = utc_to_localtime(fromdate) if untildate: untildate = normalize_date(untildate, "T23:59:59Z") else: untildate = get_latest_datestamp() untildate = utc_to_localtime(untildate) if set_spec is not None: ## either it has a value or it empty, thus meaning all records last_updated = get_set_last_update(set_spec) if last_updated is not None: last_updated = utc_to_localtime(last_updated) if last_updated > fromdate: fromdate = utc_to_localtime(get_earliest_datestamp()) recids = intbitset(recids) ## Let's clone :-) if fromdate and untildate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate))) elif fromdate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, ))) elif untildate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, ))) if cfg.get('CFG_OAI_FILTER_RESTRICTED_RECORDS', True): recids = recids - get_all_restricted_recids() return recids
def modify_translations(ID, langs, sel_type, trans, table): """add or modify translations in tables given by table frmID - the id of the format from the format table sel_type - the name type langs - the languages trans - the translations, in same order as in langs table - the table""" name = "name" if table[-1:].isupper(): name = "NAME" try: for nr in range(0,len(langs)): res = run_sql("SELECT value FROM %s%s WHERE id_%s=%%s AND type=%%s AND ln=%%s" % (table, name, table), (ID, sel_type, langs[nr][0])) if res: if trans[nr]: res = run_sql("UPDATE %s%s SET value=%%s WHERE id_%s=%%s AND type=%%s AND ln=%%s" % (table, name, table), (trans[nr], ID, sel_type, langs[nr][0])) else: res = run_sql("DELETE FROM %s%s WHERE id_%s=%%s AND type=%%s AND ln=%%s" % (table, name, table), (ID, sel_type, langs[nr][0])) else: if trans[nr]: res = run_sql("INSERT INTO %s%s (id_%s, type, ln, value) VALUES (%%s,%%s,%%s,%%s)" % (table, name, table), (ID, sel_type, langs[nr][0], trans[nr])) return (1, "") except StandardError as e: return (0, e)
def check_if_need_to_delete_message_permanently(msg_ids): """ Checks if a list of messages exist in anyone's inbox, if not, delete them permanently @param msg_id: sequence of message ids @return: number of deleted messages """ if not ((type(msg_ids) is list) or (type(msg_ids) is tuple)): msg_ids = [msg_ids] query1 = """SELECT count(id_msgMESSAGE) FROM user_msgMESSAGE WHERE id_msgMESSAGE=%s""" messages_to_delete = [] for msg_id in msg_ids: nb_users = int(run_sql(query1, (msg_id,))[0][0]) if nb_users == 0: messages_to_delete.append(int(msg_id)) if len(messages_to_delete) > 0: query2 = """DELETE FROM msgMESSAGE WHERE""" params2 = [] for msg_id in messages_to_delete[0:-1]: query2 += " id=%s OR" params2.append(msg_id) query2 += " id=%s" params2.append(messages_to_delete[-1]) run_sql(query2, tuple(params2)) return len(messages_to_delete)
def insert_new_group(uid, new_group_name, new_group_description, join_policy, login_method='INTERNAL'): """Create a new group and affiliate a user.""" query1 = """INSERT INTO usergroup (id, name, description, join_policy, login_method) VALUES (NULL,%s,%s,%s,%s) """ params1 = (new_group_name, new_group_description, join_policy, login_method) res1 = run_sql(query1, params1) date = convert_datestruct_to_datetext(localtime()) uid = int(uid) query2 = """INSERT INTO user_usergroup (id_user, id_usergroup, user_status, user_status_date) VALUES (%s,%s,'A',%s) """ params2 = (uid, res1, date) res2 = run_sql(query2, params2) return res1
def format_element(bfo, display='day_distinct_ip_nb_views'): ''' Prints record statistics @param display: the type of statistics displayed. Can be 'total_nb_view', 'day_nb_views', 'total_distinct_ip_nb_views', 'day_distincts_ip_nb_views', 'total_distinct_ip_per_day_nb_views' ''' if display == 'total_nb_views': return run_sql("""SELECT COUNT(client_host) FROM rnkPAGEVIEWS WHERE id_bibrec=%s""", (bfo.recID,))[0][0] elif display == 'day_nb_views': return run_sql("""SELECT COUNT(client_host) FROM rnkPAGEVIEWS WHERE id_bibrec=%s AND DATE(view_time)=CURDATE()""", (bfo.recID,))[0][0] elif display == 'total_distinct_ip_nb_views': return run_sql("""SELECT COUNT(DISTINCT client_host) FROM rnkPAGEVIEWS WHERE id_bibrec=%s""", (bfo.recID,))[0][0] elif display == 'day_distinct_ip_nb_views': return run_sql("""SELECT COUNT(DISTINCT client_host) FROM rnkPAGEVIEWS WHERE id_bibrec=%s AND DATE(view_time)=CURDATE()""", (bfo.recID,))[0][0] elif display == 'total_distinct_ip_per_day_nb_views': # Count the number of distinct IP addresses for every day Then # sum up. Similar to total_distinct_users_nb_views but assume # that several different users can be behind a single IP # (which could change every day) res = run_sql("""SELECT COUNT(DISTINCT client_host) FROM rnkPAGEVIEWS WHERE id_bibrec=%s GROUP BY DATE(view_time)""", (bfo.recID,)) return sum([row[0] for row in res])
def do_upgrade(): """Perform the upgrade from WorkflowsTaskResult to a simple dict.""" class WorkflowsTaskResult(object): """The class to contain the current task results.""" __module__ = os.path.splitext(os.path.basename(__file__))[0] def __init__(self, task_name, name, result): """Create a task result passing task_name, name and result.""" self.task_name = task_name self.name = name self.result = result def to_dict(self): """Return a dictionary representing a full task result.""" return {"name": self.name, "task_name": self.task_name, "result": self.result} from invenio.modules.workflows import utils utils.WorkflowsTaskResult = WorkflowsTaskResult all_data_objects = run_sql("SELECT id, _extra_data FROM bwlOBJECT") for object_id, _extra_data in all_data_objects: extra_data = cPickle.loads(base64.b64decode(_extra_data)) if "_tasks_results" in extra_data: extra_data["_tasks_results"] = convert_to_dict(extra_data["_tasks_results"]) _extra_data = base64.b64encode(cPickle.dumps(extra_data)) run_sql("UPDATE bwlOBJECT set _extra_data=%s WHERE id=%s", (_extra_data, str(object_id)))
def do_upgrade(): run_sql("UPDATE collection SET name='Zenodo' WHERE name='ZENODO'") run_sql("UPDATE collectionname SET value='Zenodo' WHERE value='ZENODO'") run_sql("UPDATE collectionname SET value='Provisional: Zenodo' " "WHERE value='Provisional: ZENODO'") run_sql("UPDATE accARGUMENT SET value='Zenodo' WHERE value='ZENODO'") run_sql("UPDATE community SET title='Zenodo' WHERE id='zenodo'")
def detach_slave(connection=None): """Detach the slave.""" if connection is None: connection = get_connection_for_dump_on_slave() # FIXME compatibility with postgresql run_sql("STOP SLAVE SQL_THREAD", connection=connection) check_slave_is_down(connection)
def attach_slave(connection=None): """Attach the slave.""" if connection is None: connection = get_connection_for_dump_on_slave() # FIXME compatibility with postgresql run_sql("START SLAVE", connection=connection) check_slave_is_up(connection)
def send_message(uids_to, msgid, status=CFG_WEBMESSAGE_STATUS_CODE["NEW"]): """ Send message to uids @param uids: sequence of user ids @param msg_id: id of message @param status: status of the message. (single char, see webmessage_config.py). @return: a list of users having their mailbox full """ if not ((type(uids_to) is list) or (type(uids_to) is tuple)): uids_to = [uids_to] user_problem = [] if len(uids_to) > 0: users_quotas = check_quota(CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES - 1) query = """INSERT INTO user_msgMESSAGE (id_user_to, id_msgMESSAGE, status) VALUES """ fixed_value = ",%s,%s)" query_params = [] def not_users_quotas_has_key(key): """ not(is key in users over quota?)""" return not (key in users_quotas) user_ids_to = filter(not_users_quotas_has_key, uids_to) user_problem = filter(users_quotas.has_key, uids_to) if len(user_ids_to) > 0: for uid_to in user_ids_to[0:-1]: query += "(%%s%s," % fixed_value query_params += [uid_to, msgid, status] query += "(%%s%s" % fixed_value query_params += [user_ids_to[-1], msgid, status] run_sql(query, tuple(query_params)) return user_problem
def external_user_warning(uid): """ Returns 'email_auto_generated' if the email of the user is auto-generated. @param uid: user id @type uid: int @rtype: ''|'email_auto_generated' """ from invenio.modules.access.local_config import CFG_TEMP_EMAIL_ADDRESS query = """ SELECT email FROM user WHERE id=%s """ params = (uid, ) email = run_sql(query, params)[0][0] regexp = re.compile(CFG_TEMP_EMAIL_ADDRESS % "\w+", re.IGNORECASE) query = """ SELECT * FROM userEXT WHERE id_user=%s """ if run_sql(query, params) and re.match(regexp, email): return 'email_auto_generated' return ''
def get_groupnames_like(uid, pattern): """Get groupnames like pattern. Will return only groups that user is allowed to see """ rlike_op = rlike() groups = {} if pattern: # For this use case external groups are like invisible one query1 = ( "SELECT id, name FROM usergroup WHERE name " + rlike_op + " %s AND join_policy like 'V%%' AND join_policy<>'VE'" ) try: res = run_sql(query1, (pattern,)) except OperationalError: res = () # The line belows inserts into groups dictionary every tuple the database returned, # assuming field0=key and field1=value map(lambda x: groups.setdefault(x[0], x[1]), res) query2 = """SELECT g.id, g.name FROM usergroup g, user_usergroup ug WHERE g.id=ug.id_usergroup AND ug.id_user=%s AND g.name " + rlike_op + " %s""" try: res = run_sql(query2, (uid, pattern)) except OperationalError: res = () map(lambda x: groups.setdefault(x[0], x[1]), res) return groups
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, oai_set_p1, oai_set_f1,oai_set_m1, oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2): """Add a definition into the OAI Repository""" try: if not oai_set_spec: oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC set_definition = 'c=' + oai_set_collection + ';' + \ 'p1=' + oai_set_p1 + ';' + \ 'f1=' + oai_set_f1 + ';' + \ 'm1=' + oai_set_m1 + ';' + \ 'op1='+ oai_set_op1 + ';' + \ 'p2=' + oai_set_p2 + ';' + \ 'f2=' + oai_set_f2 + ';' + \ 'm2=' + oai_set_m2 + ';' + \ 'op2='+ oai_set_op2 + ';' + \ 'p3=' + oai_set_p3 + ';' + \ 'f3=' + oai_set_f3 + ';' + \ 'm3=' + oai_set_m3 + ';' run_sql("""INSERT INTO oaiREPOSITORY (id, setName, setSpec, setCollection, setDescription, setDefinition, setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3) VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", (oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, set_definition, oai_set_p1, oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3)) return (1, "") except StandardError as e: register_exception(alert_admin=True) return (0, e)
def do_upgrade(): all_tables = [t[0] for t in run_sql("SHOW TABLES LIKE 'idx%R'")] + \ [t[0] for t in run_sql("SHOW TABLES LIKE 'rnk%R'")] for table in all_tables: create_statement = run_sql('SHOW CREATE TABLE %s' % table)[0][1] if 'KEY `type`' not in create_statement: run_sql("ALTER TABLE %s ADD INDEX type (type)" % (table,))
def do_upgrade(): """Upgrade recipe procedure.""" if not run_sql("SELECT id FROM format WHERE code='recjson'"): run_sql("INSERT INTO format " "(name,code,description,content_type,visibility) " "VALUES ('recjson','recjson', 'recjson record representation'," "'application/json', 0)")
def detach_col_rnk(rnkID, colID): """detach rank method from collection rnkID - id from rnkMETHOD table colID - id of collection, as in collection table """ try: res = run_sql( "DELETE FROM collection_rnkMETHOD WHERE id_collection=%s AND id_rnkMETHOD=%s" % (colID, rnkID)) return (1, "") except StandardError as e: return (0, e)
def insert_into_missing(recid, report): """Mark reference string as missing. If a reference is a report number / journal / DOI but we do not have the corresping record in the database, we mark that particualar reference string as missing, by adding a row in rnkCITATIONDATAEXT. The recid represents the record containing the reference string. """ if len(report) >= 255: # Invalid report, it is too long # and does not fit in the database column # (currently varchar 255) return wasalready = run_sql("""SELECT id_bibrec FROM rnkCITATIONDATAEXT WHERE id_bibrec = %s AND extcitepubinfo = %s""", (recid, report)) if not wasalready: run_sql("""INSERT INTO rnkCITATIONDATAEXT(id_bibrec, extcitepubinfo) VALUES (%s,%s)""", (recid, report))
def get_bibrankmethod_lastupdate(rank_method_code): """Return the last excution date of bibrank method """ query = """SELECT DATE_FORMAT(last_updated, '%%Y-%%m-%%d %%H:%%i:%%s') FROM rnkMETHOD WHERE name =%s""" last_update_time = run_sql(query, [rank_method_code]) try: r = last_update_time[0][0] except IndexError: r = "0000-00-00 00:00:00" return r
def get_uid_from_email(email): """Return the uid corresponding to an email. Return -1 when the email does not exists.""" try: res = run_sql("SELECT id FROM user WHERE email=%s", (email, )) if res: return res[0][0] else: return -1 except OperationalError: register_exception() return -1
def clean_documents(): """Delete all the bibdocs that have been set as deleted and have not been modified since CFG_DELETED_BIBDOC_MAXLIFE days. Returns the number of bibdocs involved.""" write_message("""CLEANING OF OBSOLETED DELETED DOCUMENTS STARTED""") write_message( "select id from bibdoc where status='DELETED' and NOW()>ADDTIME(modification_date, '%s 0:0:0')" % CFG_DELETED_BIBDOC_MAXLIFE, verbose=9) records = run_sql( "select id from bibdoc where status='DELETED' and NOW()>ADDTIME(modification_date, '%s 0:0:0')", (CFG_DELETED_BIBDOC_MAXLIFE, )) for record in records: bibdoc = BibDoc.create_instance(record[0]) bibdoc.expunge() write_message("DELETE FROM bibdoc WHERE id=%i" % int(record[0]), verbose=9) run_sql("DELETE FROM bibdoc WHERE id=%s", (record[0], )) write_message("""%s obsoleted deleted documents cleaned""" % len(records)) write_message("""CLEANING OF OBSOLETED DELETED DOCUMENTS FINISHED""") return len(records)
def compute_refs_diff(recid, new_refs): """ Given a set of references for a record, returns how many references were added to it. The value can be negative which means the record lost citations. """ old_refs = set(row[0] for row in run_sql("""SELECT citee FROM rnkCITATIONDICT WHERE citer = %s""", [recid])) refs_to_add = new_refs - old_refs refs_to_delete = old_refs - new_refs return len(refs_to_add) - len(refs_to_delete)
def get_nickname_or_email(uid): """Return nickname (preferred) or the email address of the user uid. Return string 'guest' in case the user is not found.""" out = "guest" res = run_sql("""SELECT nickname, email FROM "user" WHERE id=%s""", (uid, ), 1) if res and res[0]: if res[0][0]: out = res[0][0] elif res[0][1]: out = res[0][1].lower() return out
def get_records_range_for_index(index_id): """ Get records range for given index. """ try: query = """SELECT min(id_bibrec), max(id_bibrec) FROM idxWORD%02dR""" % index_id resp = run_sql(query) if resp: return resp[0] return None except Exception: return None
def get_index_remove_html_markup(index_id): """ Gets remove_html_markup parameter from database ('Yes' or 'No') and changes it to True, False. Just for consistency with WordTable.""" try: result = run_sql("SELECT remove_html_markup FROM idxINDEX WHERE ID=%s", (index_id, ))[0][0] except: return False if result == 'Yes': return True return False
def get_url_list(list_type): """ @param list_type: of CFG_WEBLINKBACK_LIST_TYPE @return (url0, ..., urln) in ascending order by url """ result = run_sql( """SELECT url FROM lnkADMINURL WHERE list=%s ORDER by url ASC """, (list_type, )) return tuple(url[0] for (url) in result)
def get_field(recid, field): """ Gets list of field 'field' for the record with 'recid' system number. """ digit = field[0:2] bibbx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (wash_table_column_name(bibbx), wash_table_column_name(bibx)) return [row[0] for row in run_sql(query, (recid, field))]
def filter_downloads_per_hour_with_docid(keys, last_updated): """filter all the duplicate downloads per user for each hour intervall""" for k in keys: id_bibdocs = run_sql( "select distinct id_bibdoc from rnkDOWNLOADS where id_bibrec=%s" % k) for bibdoc in id_bibdocs: values = run_sql( """select DATE_FORMAT(download_time,"%%Y-%%m-%%d %%H"), client_host from rnkDOWNLOADS where id_bibrec=%s and id_bibdoc=%s and download_time >=\"%s\";""" % (k, bibdoc[0], last_updated)) for val in values: date_res = val[0] date1 = "%s:00:00" % (date_res) date2 = compute_next_hour(date_res) duplicates = (run_sql( "select count(*) from rnkDOWNLOADS where id_bibrec=%s and id_bibdoc=%s and download_time>='%s' and download_time<'%s' and client_host=%s;" % (k, bibdoc[0], date1, date2, val[1]))[0][0]) - 1 run_sql( "delete from rnkDOWNLOADS where id_bibrec=%s and id_bibdoc=%s and download_time>='%s' and download_time<'%s' and client_host=%s limit %s;" % (k, bibdoc[0], date1, date2, val[1], duplicates))
def query2list(query, table_name): """Given a SQL query return a list of dictionaries with the results""" results = run_sql(query, with_desc=True) lst_results = [] dict_results = {} for section_id, result in enumerate(results[0]): dict_results = {} for index, field in enumerate(results[1]): if not is_blacklisted(table_name, field[0]): dict_results[field[0]] = result[index] lst_results.append(dict_results) return lst_results
def get_latest_datestamp(): """Get latest datestamp in the database Return empty string if no records or latest datestamp in UTC. """ out = CFG_MAX_DATE res = run_sql("SELECT " + datetime_format('MAX(modification_date)', False) + " FROM bibrec", n=1) if res and res[0][0]: out = localtime_to_utc(res[0][0]) return out
def get_data_for_definition_bibrec(column_name, recids_copy): '''Having a column_name and a list of recids, it returns a dictionary mapping each recids with its correspondig value from the column''' dict_column = {} for recid in recids_copy: creation_date = run_sql( 'SELECT %s from bibrec WHERE id = %%s' % column_name, (recid, ))[0][0] new_creation_date = datetime(creation_date.year,creation_date.month,creation_date.day, \ creation_date.hour,creation_date.minute, creation_date.second) dict_column[recid] = new_creation_date.strftime('%Y%m%d%H%M%S') return dict_column
def fetch_auto_mode(self): # If the daemon is not running at all, we are in manual mode if not server_pid(): status = 0 else: # Otherwise check the daemon status r = run_sql("""SELECT value FROM "schSTATUS" WHERE name = 'auto_mode'""") try: status = int(r[0][0]) except (ValueError, IndexError): status = 0 return status
def post_upgrade(): """Check for potentially invalid revisions""" res = run_sql( """SELECT DISTINCT(id_bibrec) FROM hstRECORD WHERE CHAR_LENGTH(marcxml) = %s""", [2**16 - 1]) if res: warnings.warn( "You have %s records with potentially corrupt history revisions!" % len(res)) warnings.warn("You may want to run the following:") for row in res: warnings.warn("bibedit --fix-revisions %s" % row[0])
def get_nicknames_like(pattern): """get nicknames like pattern""" rlike_op = rlike() if pattern: try: res = run_sql( """SELECT nickname FROM "user" WHERE nickname """ + rlike_op + """ %s""", (pattern, )) except OperationalError: res = () return res return ()
def attach_col_rnk(rnkID, colID): """attach rank method to collection rnkID - id from rnkMETHOD table colID - id of collection, as in collection table """ try: res = run_sql( "INSERT INTO collection_rnkMETHOD(id_collection, id_rnkMETHOD) values (%s,%s)" % (colID, rnkID)) return (1, "") except StandardError as e: return (0, e)
def bst_openaire_pgreplayqueue(): """ Execute a failed D-NET query. See invenio.dnetutils.dnet_save_query_into_pgreplayqueue for further info. """ replayqueue = intbitset(run_sql("SELECT id FROM pgreplayqueue")) for queryid in replayqueue: query, param = loads( decompress( run_sql("SELECT query FROM pgreplayqueue WHERE id=%s", (queryid, ))[0][0])) try: dnet_run_sql(query, param, support_replay=False) except: ## Mmh... things are still not working. Better give up now! try: run_sql("UPDATE pgreplayqueue SET last_try=NOW() WHERE id=%s", (queryid, )) except: register_exception(alert_admin=True) ## We are not really interested in this particular error. pass raise else: run_sql("DELETE FROM pgreplayqueue WHERE id=%s", (queryid, ))
def create_linkback(origin_url, recid, additional_properties, linkback_type, user_info): """ Create linkback @param origin_url: origin URL, @param recid: recid @param additional_properties: additional properties @param linkback_type: linkback type @param user_info: user info @return id of the created linkback """ linkbackid = run_sql( """INSERT INTO lnkENTRY (origin_url, id_bibrec, additional_properties, type, status, insert_time) VALUES (%s, %s, %s, %s, %s, NOW()); SELECT LAST_INSERT_ID(); """, (origin_url, recid, str(additional_properties), linkback_type, CFG_WEBLINKBACK_STATUS['PENDING'])) logid = run_sql( """INSERT INTO lnkLOG (id_user, action, log_time) VALUES (%s, %s, NOW()); SELECT LAST_INSERT_ID(); """, (user_info['uid'], CFG_WEBLINKBACK_STATUS['INSERTED'])) run_sql( """INSERT INTO lnkENTRYLOG (id_lnkENTRY, id_lnkLOG) VALUES (%s, %s); """, (linkbackid, logid)) # add url title entry if necessary if len( run_sql( """SELECT url FROM lnkENTRYURLTITLE WHERE url=%s """, (origin_url, ))) == 0: manual_set_title = 0 title = "" if additional_properties != "" and 'title' in additional_properties.keys( ): manual_set_title = 1 title = additional_properties['title'] run_sql( """INSERT INTO lnkENTRYURLTITLE (url, title, manual_set) VALUES (%s, %s, %s) """, (origin_url, title, manual_set_title)) return linkbackid
def migrate_comments_populate_threads_index(): """ Fill in the `reply_order_cached_data' columns in cmtRECORDCOMMENT and bskRECORDCOMMENT tables with adequate values so that thread are displayed correctly. """ # Update WebComment comments res = run_sql( """SELECT id FROM "cmtRECORDCOMMENT" WHERE reply_order_cached_data is NULL""" ) for row in res: reply_order_cached_data = get_reply_order_cache_data(row[0]) run_sql( """UPDATE "cmtRECORDCOMMENT" set reply_order_cached_data=%s WHERE id=%s""", (reply_order_cached_data, row[0])) # Update WebBasket comments res = run_sql( """SELECT id FROM "bskRECORDCOMMENT" WHERE reply_order_cached_data is NULL""" ) for row in res: reply_order_cached_data = get_reply_order_cache_data(row[0]) run_sql( """UPDATE "cmtRECORDCOMMENT" set reply_order_cached_data=%s WHERE id=%s""", (reply_order_cached_data, row[0]))
def remove_url(url): """ Remove a URL from list @param url: unique URL string for all lists """ # get ids urlid = run_sql( """SELECT id FROM lnkADMINURL WHERE url=%s """, (url, ))[0][0] logids = run_sql( """SELECT log.id FROM lnkLOG log JOIN lnkADMINURLLOG url_log ON log.id=url_log.id_lnkLOG WHERE url_log.id_lnkADMINURL=%s """, (urlid, )) # delete url and url log run_sql( """DELETE FROM lnkADMINURL WHERE id=%s; DELETE FROM lnkADMINURLLOG WHERE id_lnkADMINURL=%s """, (urlid, urlid)) # delete log for logid in logids: run_sql( """DELETE FROM lnkLOG WHERE id=%s """, (logid[0], ))
def do_upgrade(): """Implement your upgrades here.""" run_sql("INSERT INTO collection (name, dbquery)\ VALUES (%s, %s)", ('proposal', '980__b:proposal')) coll_id = run_sql("SELECT id FROM collection WHERE name='proposal'")[0][0] run_sql( "INSERT INTO collectiondetailedrecordpagetabs (id_collection, tabs)\ VALUES (%s, 'usage;comments;metadata;files')", (coll_id, )) run_sql( "INSERT INTO collection_collection (id_dad, id_son, type, score)\ VALUES (%s, %s, %s, %s)", (2, coll_id, 'r', coll_id), ) run_sql( "INSERT INTO collectionname (id_collection, ln, type, value)\ VALUES (%s, %s, %s, %s)", (coll_id, 'en', 'ln', 'Proposal'), )
def getUid(req): """Return user ID taking it from the cookie of the request. Includes control mechanism for the guest users, inserting in the database table when need be, raising the cookie back to the client. User ID is set to 0 when client refuses cookie or we are in the read-only site operation mode. User ID is set to -1 when we are in the permission denied site operation mode. getUid(req) -> userId """ #if hasattr(req, '_user_info'): # return req._user_info['_uid'] if CFG_ACCESS_CONTROL_LEVEL_SITE == 1: return 0 if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: return -1 guest = 0 from flask import session uid = session.uid if not session.need_https: if uid == -1: # first time, so create a guest user if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0: session['uid'] = 0 session.set_remember_me(False) return 0 else: return -1 else: if not hasattr(req, '_user_info') and 'user_info' in session: req._user_info = session['user_info'] req._user_info = collect_user_info(req, refresh=True) if guest == 0: guest = isGuestUser(uid) if guest: if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0: return uid elif CFG_ACCESS_CONTROL_LEVEL_GUESTS >= 1: return -1 else: res = run_sql("SELECT note FROM user WHERE id=%s", (uid,)) if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 0: return uid elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1 and res and res[0][0] in [1, "1"]: return uid else: return -1
def check_slave_is_in_consistent_state(connection=None): """ Check if the slave is already aware that dbdump task is running. dbdump being a monotask, guarantee that no other task is currently running and it's hence safe to detach the slave and start the actual dump. """ if connection is None: connection = get_connection_for_dump_on_slave() i = 0 ## Let's take the current status of dbdump (e.g. RUNNING, ABOUT TO STOP, etc.)... current_status = run_sql("""SELECT status FROM "schTASK" WHERE id=%s""", (task_get_task_param('task_id'), ))[0][0] while True: if i == 10: ## Timeout!! raise StandardError("The slave seems not to pick up with the master") ## ...and let's see if it matches with what the slave sees. if run_sql("""SELECT status FROM "schTASK" WHERE id=%s AND status=%s""", (task_get_task_param('task_id'), current_status), connection=connection): ## Bingo! return time.sleep(3) i += 1
def _detect_jobs_to_run(string_of_jobnames=None): """Detect which jobs to run from optional string of jobs. If not passed, run all jobs. Return list of jobnames to run.""" if string_of_jobnames: jobnames = string_of_jobnames.split(',') else: jobnames = [] # FIXME: pay attention to periodicity; extract only jobs needed to run res = run_sql("""SELECT jobname FROM "expJOB" """) for row in res: jobnames.append(row[0]) return jobnames
def do_upgrade(): """Implement your upgrades here.""" op.add_column( 'format', db.Column('mime_type', db.String(length=255), unique=True, nullable=True)) mime_type_dict = dict( xm='application/marcxml+xml', hm='application/marc', recjson='application/json', hx='application/x-bibtex', xn='application/x-nlm', ) query = "UPDATE format SET mime_type=%s WHERE code=%s" for code, mime in mime_type_dict.items(): params = (mime, code) try: run_sql(query, params) except Exception as e: warnings.warn("Failed to execute query {0}: {1}".format(query, e))
def get_user_collections(req): """ Return collections for which the user is moderator """ user_info = collect_user_info(req) res = [] collections = run_sql('SELECT name FROM collection') for collection in collections: collection_emails = acc_get_authorized_emails('moderatecomments', collection=collection[0]) if user_info['email'] in collection_emails or isUserAdmin(user_info): res.append(collection[0]) return res
def register_page_view_event(recid, uid, client_ip_address): """Register Detailed record page view event for record RECID consulted by user UID from machine CLIENT_HOST_IP. To be called by the search engine. """ if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: # do not register access if we are in read-only access control # site mode: return [] return run_sql("INSERT INTO rnkPAGEVIEWS " \ " (id_bibrec,id_user,client_host,view_time) " \ " VALUES (%s,%s,INET_ATON(%s),NOW())", \ (recid, uid, client_ip_address))