def arxiv_login(req): ''' Log in through arxive. If user already associated to a personid, returns the personid. If user has no pid, try to guess which personid to associate based on surname and papers from arxiv. If no compatible person is found, creates a new person. At the end of the process opens a ticket for the user claiming the papers from arxiv. !!! the user will find the open ticket, which will require him to go through the final review before getting committed. @param req: Apache request object @type req: Apache request object @return: Returns the pid resulting in the process @rtype: int ''' def session_bareinit(req): session = get_session(req) try: pinfo = session["personinfo"] if 'ticket' not in pinfo: pinfo["ticket"] = [] except KeyError: pinfo = dict() session['personinfo'] = pinfo pinfo["ticket"] = [] session.save() session_bareinit(req) session = get_session(req) ticket = session['personinfo']['ticket'] uid = getUid(req) curren_pid = tu.get_personid_from_uid([[uid]]) if curren_pid[1]: return curren_pid[0][0] uinfo = collect_user_info(req) arxiv_p_ids = [] name = '' surname = '' try: for i in uinfo['external_arxivids'].split(';'): arxiv_p_ids.append(i) name = uinfo['external_firstname'] surname = uinfo['external_familyname'] #'external_arxivids': 'hep-th/0112017;hep-th/0112020', #'external_familyname': 'Weiler', #'external_firstname': 'Henning', except KeyError: pass found_bibrecs = [] for arx in arxiv_p_ids: t = search_engine.perform_request_search(p='037:' + str(arx), of='id') for i in t: found_bibrecs.append(i) #found_bibrecs = [78] bibrec_names = [] for b in found_bibrecs: bibrec_names.append([b, bu.get_field_values_on_condition(b, source='API', get_table=['100', '700'], get_tag='a')]) for n in list(bibrec_names): for i in list(n[1]): if bau.soft_compare_names(surname, i.encode('utf-8')) < 0.4: n[1].remove(i) #bibrec_names = [[78, set([u'M\xfcck, W'])]] #what is left are only suitable names for each record. bibrefrecs = [] for bibrec in bibrec_names: for name in bibrec[1]: bibrefs = tu0.get_bibrefs_from_name_string(name.encode('utf-8')) if len(bibrefs) < 1: continue for bibref in bibrefs[0][0].split(','): bibrefrecs.append(str(bibref) + ',' + str(bibrec[0])) #bibrefrec = ['100:116,78', '700:505,78'] brr = [[i] for i in bibrefrecs] possible_persons = tu.get_possible_personids_from_paperlist(brr) #[[0L, ['700:316,10']]] possible_persons = sorted(possible_persons, key=lambda k: len(k[1])) person_papers = [] if len(possible_persons) > 1: if len(possible_persons[0][1]) > len(possible_persons[1][1]): pid = tu.assign_person_to_uid(uid, possible_persons[0][0]) person_papers = possible_persons[0][1] else: pid = tu.assign_person_to_uid(uid, -1) elif len(possible_persons) == 1: pid = tu.assign_person_to_uid(uid, possible_persons[0][0]) person_papers = possible_persons[0][1] else: pid = tu.assign_person_to_uid(uid, -1) tempticket = [] #now we have to open the tickets... for bibref in person_papers: tempticket.append({'pid':pid, 'bibref':bibref, 'action':'confirm'}) done_bibrecs = [b.split(',')[1] for b in person_papers] for b in found_bibrecs: if str(b) not in done_bibrecs: tempticket.append({'pid':pid, 'bibref':str(b), 'action':'confirm'}) #check if ticket targets (bibref for pid) are already in ticket for t in list(tempticket): for e in list(ticket): if e['pid'] == t['pid'] and e['bibref'] == t['bibref']: ticket.remove(e) ticket.append(t) session.save() return pid
from invenio.bibauthorid_utils import get_field_values_on_condition d = run_sql("Select * from aidPERSONID where personid = 417716") refs = {} recs = {} for i in d: if i[2] == 'paper': ref,rec = i[3].split(',') if ref in refs: refs[ref] += 1 else: refs[ref] = 1 if rec in recs: recs[rec] += 1 else: recs[rec] = 1 x = [[i, recs[i]] for i in recs if recs[i] > 1] for f in x: print "record: %s" % f[0] print " 100a: %s (700i: %s)" % (get_fieldvalues(int(f[0]), "100__a"), list(get_field_values_on_condition(int(f[0]), [100,700], "i", "a", get_fieldvalues(int(f[0]), "100__a")[0], source="API"))[0]) for n in [i for i in get_fieldvalues(int(f[0]), "700__a") if i.lower().startswith("gru") and "wald" not in i.lower()]: print " 700a:", n, "(700i: %s)" % list(get_field_values_on_condition(int(f[0]), [100,700], "i", "a", n, source="API"))[0] print " 710g:", get_fieldvalues(int(f[0]), "710__g") print " "