def do_upgrade(): logger = Logger("Rabbit m_name upgrade script") warnings.filterwarnings('ignore') run_sql( "alter table aidPERSONIDPAPERS add `m_name` VARCHAR(255) not null after name" ) run_sql("alter table aidPERSONIDPAPERS add INDEX `m_name-b` (`m_name`)") present_bibrefs = set( run_sql("select bibref_table, bibref_value from aidPERSONIDPAPERS")) total_updates = len(present_bibrefs) records_for_rabbit = set() for i, bibref in enumerate(present_bibrefs): logger.update_status( float(i) / total_updates, '%s out of %s (%s)' % (str(i), str(total_updates), str(bibref))) try: name = get_name_by_bibref(bibref) except AssertionError, error: if "A bibref must have exactly one name" in error.message: records_for_rabbit.add(bibref[1]) else: raise error else: m_name = create_matchable_name(name) run_sql( "update aidPERSONIDPAPERS set name=%s, m_name=%s where bibref_table=%s " "and bibref_value=%s ", (name, m_name, bibref[0], bibref[1]))
def show_papers(personid, external_id=None, orcid=None, inspire=None): search = 'select * from aidPERSONIDPAPERS where personid=' search += str(personid) + ' and flag>-2' result = run_sql(search) hep_records = '' for personid, table, bibref, bibrec, author, match, flag, cul, date \ in result: #for personid, table, bibref, bibrec, author in result: #rec = AmendableRecord(get_bibrecord(bibrec)) position = -1 author_name = get_name_by_bibref((table, bibref)) for key, value in AmendableRecord(get_bibrecord(bibrec)).\ iterfields(['{0}__%'.format(table, )]): if (key[0] == '700__a' or key[0] == '100__a') and \ value == author_name: position = key[1] if position >= 0: for key, value in AmendableRecord(get_bibrecord(bibrec)).\ iterfields(['{0}__%'.format(table, )]): if key[1] == position and key[0] in \ ('{0}__a'.format(table), '{0}__i'. format(table), '{0}__j'.format(table), '{0}__k'.format(table), '{0}__m'.format(table), ): if value.replace('ORCID:', '') == external_id and \ value.replace('ORCID:', '') != orcid and \ value != inspire: hep_records += " " + " ".join([str(bibrec), author, value, '\n']) if hep_records: return hep_records return None
def export_to_dot(cs, fname, graph_info, extra_edge=None): from invenio.bibauthorid_dbinterface import get_name_by_bibref fptr = open(fname, "w") fptr.write("graph wedgy {\n") fptr.write(" overlap=prism\n") for idx, bib in enumerate(graph_info): fptr.write(' %d [color=black label="%s"];\n' % (idx, get_name_by_bibref(idx))) if extra_edge: v1, v2, (prob, cert) = extra_edge fptr.write(' %d -- %d [color=green label="p: %.2f, c: %.2f"];\n' % (v1, v2, prob, cert)) for clus in cs.clusters: fptr.write(" %s [color=blue];\n" % " -- ".join(str(x) for x in clus.bibs)) fptr.write("".join(" %d -- %d [color=red]\n" % (b1, b2) for b1 in clus.bibs for h in clus.hate for b2 in h.bibs)) fptr.write("}")
def process_authors(pids): # get the author - paper associations for authors # with id: min_pid <= id < max_pid (excluding rejected papers) associations = set( run_sql("""select personid, bibref_table, bibref_value, bibrec from aidPERSONIDPAPERS where flag > -2 and personid in ( %s ) """ % ' , '.join("'" + str(p) + "'" for p in pids))) # get mapping of: (bibref_table, bibref_value) -> name bibref_to_name = dict() bibrefs = set([(table, ref) for _, table, ref, _ in associations]) counter = 0 for bibref in bibrefs: counter += 1 bibref_to_name[bibref] = get_name_by_bibref(bibref) write_message('Got name: %s for bibref: %s.' % (bibref_to_name[bibref], str(bibref))) # get mapping of: author -> affiliations pid_to_affiliations = dict() counterr = 0 for pid in pids: counterr += 1 pid_to_affiliations[pid] = get_affiliations_for_author(pid) write_message('Got affiliations: %s for author: %s.' % (pid_to_affiliations[pid], str(pid))) # get the affiliated records for this group of authors: (rec, name, affiliation, ... + some extra info) affiliated_records = list() for pid, table, ref, rec in associations: # we don't want to keep records which have no affiliation/s affiliations, year = pid_to_affiliations[pid] if not affiliations: continue affiliated_records += [ rec, bibref_to_name[(table, ref)], serialize(affiliations), table, ref, year ] # flush data to db if affiliated_records: write_message('Populating table with %s records.' % len(affiliated_records)) populate_table('bibEDITAFFILIATIONS_tmp', [ 'bibrec', 'name', 'affiliations', 'bibref_table', 'bibref_value', 'year' ], affiliated_records, empty_table_first=False) return len(affiliated_records)
def get_message_body(self): """Return the body of the message to be reported by the exception""" msg = ['Found wrong signature claimed to profile '] try: cname = get_canonical_name_of_author(self.pid)[0] except IndexError: cname = self.pid msg.append("%s/author/profile/%s" % (CFG_SITE_URL, cname)) sig_name = get_name_by_bibref(self.signature[0:2]) p_sigs = [(x, get_name_by_bibref(x[0:2])) for x in self.present_signatures] p_sig_strings = ",".join('%s (%s on record %s)' % (x[0], x[1], x[0][2]) for x in p_sigs) msg.append( "want to move %s (%s on record %s) to this profile but [%s] are already present and claimed" % (self.signature, sig_name, self.signature[2], p_sig_strings)) msg.append("%s/record/%s" % (CFG_SITE_URL, self.signature[2])) # msg.append(self.message) return '\n'.join(msg)
def process_authors(pids): # get the author - paper associations for authors # with id: min_pid <= id < max_pid (excluding rejected papers) associations = set(run_sql("""select personid, bibref_table, bibref_value, bibrec from aidPERSONIDPAPERS where flag > -2 and personid in ( %s ) """ % ' , '.join("'"+str(p)+"'" for p in pids))) # get mapping of: (bibref_table, bibref_value) -> name bibref_to_name = dict() bibrefs = set([(table, ref) for _, table, ref, _ in associations]) counter = 0 for bibref in bibrefs: counter += 1 bibref_to_name[bibref] = get_name_by_bibref(bibref) write_message('Got name: %s for bibref: %s.' % (bibref_to_name[bibref], str(bibref))) # get mapping of: author -> affiliations pid_to_affiliations = dict() counterr = 0 for pid in pids: counterr += 1 pid_to_affiliations[pid] = get_affiliations_for_author(pid) write_message('Got affiliations: %s for author: %s.' % (pid_to_affiliations[pid], str(pid))) # get the affiliated records for this group of authors: (rec, name, affiliation, ... + some extra info) affiliated_records = list() for pid, table, ref, rec in associations: # we don't want to keep records which have no affiliation/s affiliations, year = pid_to_affiliations[pid] if not affiliations: continue affiliated_records += [rec, bibref_to_name[(table, ref)], serialize(affiliations), table, ref, year] # flush data to db if affiliated_records: write_message('Populating table with %s records.' % len(affiliated_records)) populate_table('bibEDITAFFILIATIONS_tmp', ['bibrec','name','affiliations','bibref_table','bibref_value', 'year'], affiliated_records, empty_table_first=False) return len(affiliated_records)