def get_kbd_values(kbname, searchwith=""): """Return a list of values by searching a dynamic kb. @param kbname: name of the knowledge base @param searchwith: a term to search with """ import search_engine #first check that the kb in question is dynamic kbid = bibknowledge_dblayer.get_kb_id(kbname) if not kbid: return [] kbtype = bibknowledge_dblayer.get_kb_type(kbid) if not kbtype: return [] if kbtype != 'd': return [] #get the configuration so that we see what the field is confdict = bibknowledge_dblayer.get_kb_dyn_config(kbid) if not confdict: return [] if not confdict.has_key('field'): return [] field = confdict['field'] expression = confdict['expression'] collection = "" if confdict.has_key('collection'): collection = confdict['collection'] reclist = [] #return this if searchwith and expression: if (expression.count('%') > 0): expression = expression.replace("%", searchwith) reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #no %.. just make a combination expression = expression + " and "+searchwith reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #either no expr or no searchwith.. but never mind about searchwith if expression: #in this case: only expression reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #make a fake expression so that only records that have this field #will be returned fake_exp = "/.*/" if searchwith: fake_exp = searchwith reclist = search_engine.perform_request_search(f=field, p=fake_exp, cc=collection) if reclist: fieldvaluelist = search_engine.get_most_popular_field_values(reclist, field) val_list = [] for f in fieldvaluelist: (val, dummy) = f val_list.append(val) return val_list return [] #in case nothing worked
def get_kbd_values(kbname, searchwith=""): """ To be used by bibedit. Returns a list of values based on a dynamic kb. @param kbname: name of the knowledge base @param searchwith: a term to search with """ import search_engine #first check that the kb in question is dynamic kbid = bibknowledge_dblayer.get_kb_id(kbname) if not kbid: return [] kbtype = bibknowledge_dblayer.get_kb_type(kbid) if not kbtype: return [] if kbtype != 'd': return [] #get the configuration so that we see what the field is confdict = bibknowledge_dblayer.get_kb_dyn_config(kbid) if not confdict: return [] if not confdict.has_key('field'): return [] field = confdict['field'] expression = confdict['expression'] collection = "" if confdict.has_key('collection'): collection = confdict['collection'] reclist = [] #return this #see if searchwith is a quoted expression if searchwith: if not searchwith.startswith("'"): searchwith = "'"+searchwith if not searchwith.endswith("'"): searchwith = searchwith+"'" if searchwith and expression: if (expression.count('%') > 0) or (expression.endswith(":*")): expression = expression.replace("%", searchwith) expression = expression.replace(":*", ':'+searchwith) else: #no %.. just make a combination expression = expression + "and "+searchwith reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #either no expr or no searchwith.. but never mind about searchwith if expression: reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #make a fake expression so that only records that have this field #will be returned fake_exp = "/.*/" if searchwith: fake_exp = searchwith reclist = search_engine.perform_request_search(f=field, p=fake_exp, cc=collection) if reclist: fieldvaluelist = search_engine.get_most_popular_field_values(reclist, field) val_list = [] for f in fieldvaluelist: (val, dummy) = f #support "starts with", #indicated by the * at the end of the searchstring if searchwith and (len(searchwith) > 2) and (searchwith[-2] == '*'): if (val.startswith(searchwith[1:-3])): val_list.append(val) else: val_list.append(val) return val_list return [] #in case nothing worked
def personid_fast_assign_papers(paperslist=None, use_threading_not_multiprocessing=True): ''' Assign papers to the most compatible person. Compares only the name to find the right person to assign to. If nobody seems compatible, create a new person. ''' class Worker(Thread): def __init__(self, i, p_q, atul, personid_new_id_lock, checker): Thread.__init__(self) self.i = i self.checker = checker self.p_q = p_q self.atul = atul self.personid_new_id_lock = personid_new_id_lock def run(self): while True: if checker.should_stop(): break try: bibrec = self.p_q.get_nowait() except Empty: break close_connection() pfap_assign_paper_iteration(self.i, bibrec, self.atul, self.personid_new_id_lock) def _pfap_assign_paper(i, p_q, atul, personid_new_id_lock, checker): while True: # check bibsched if checker.should_stop(): break try: bibrec = p_q.get_nowait() except Empty: break pfap_assign_paper_iteration(i, bibrec, atul, personid_new_id_lock) _pfap_printmsg('starter', 'Started') if not paperslist: #paperslist = run_sql('select id from bibrec where 1') paperslist = [[x] for x in perform_request_search(p="")] paperslist = [k[0] for k in paperslist] _pfap_printmsg('starter', 'Starting on %s papers ' % len(paperslist)) if use_threading_not_multiprocessing: authornames_table_update_lock = Lock() personid_new_id_lock = Lock() papers_q = Queue() else: authornames_table_update_lock = multiprocessing.Lock() personid_new_id_lock = multiprocessing.Lock() papers_q = multiprocessing.Queue() for p in paperslist: papers_q.put(p) process_list = [] c = 0 if not use_threading_not_multiprocessing: while not papers_q.empty(): checker = status_checker() while len(process_list) <= bconfig.CFG_BIBAUTHORID_MAX_PROCESSES: p = multiprocessing.Process(target=_pfap_assign_paper, args=(c, papers_q, authornames_table_update_lock, personid_new_id_lock, checker)) c += 1 process_list.append(p) p.start() for i, p in enumerate(tuple(process_list)): if not p.is_alive(): p.join() process_list.remove(p) task_sleep_now_if_required(True) else: max_processes = bconfig.CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS checker = status_checker() workers = [] while not papers_q.empty(): i = 0 while len(workers) < max_processes: w = Worker(i, papers_q, authornames_table_update_lock, personid_new_id_lock, checker) i += 1 w.start() workers.append(w) for c, p in enumerate(tuple(workers)): if not p.is_alive(): p.join() workers.remove(p) task_sleep_now_if_required(True)
def get_kbd_values(kbname, searchwith=""): """ To be used by bibedit. Returns a list of values based on a dynamic kb. @param kbname: name of the knowledge base @param searchwith: a term to search with """ import search_engine #first check that the kb in question is dynamic kbid = bibknowledge_dblayer.get_kb_id(kbname) if not kbid: return [] kbtype = bibknowledge_dblayer.get_kb_type(kbid) if not kbtype: return [] if kbtype != 'd': return [] #get the configuration so that we see what the field is confdict = bibknowledge_dblayer.get_kb_dyn_config(kbid) if not confdict: return [] if not confdict.has_key('field'): return [] field = confdict['field'] expression = confdict['expression'] collection = "" if confdict.has_key('collection'): collection = confdict['collection'] reclist = [] #return this #see if searchwith is a quoted expression if searchwith: if not searchwith.startswith("'"): searchwith = "'" + searchwith if not searchwith.endswith("'"): searchwith = searchwith + "'" if searchwith and expression: if (expression.count('%') > 0) or (expression.endswith(":*")): expression = expression.replace("%", searchwith) expression = expression.replace(":*", ':' + searchwith) else: #no %.. just make a combination expression = expression + "and " + searchwith reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #either no expr or no searchwith.. but never mind about searchwith if expression: reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #make a fake expression so that only records that have this field #will be returned fake_exp = "/.*/" if searchwith: fake_exp = searchwith reclist = search_engine.perform_request_search(f=field, p=fake_exp, cc=collection) if reclist: fieldvaluelist = search_engine.get_most_popular_field_values( reclist, field) val_list = [] for f in fieldvaluelist: (val, dummy) = f #support "starts with", #indicated by the * at the end of the searchstring if searchwith and (len(searchwith) > 2) and (searchwith[-2] == '*'): if (val.startswith(searchwith[1:-3])): val_list.append(val) else: val_list.append(val) return val_list return [] #in case nothing worked
def arxiv_login(req, picked_profile=None): ''' Log in through arxive. If user already associated to a personid, returns the personid. If user has no pid, try to guess which personid to associate based on surname and papers from arxiv. If no compatible person is found, creates a new person. At the end of the process opens a ticket for the user claiming the papers from arxiv. !!! the user will find the open ticket, which will require him to go through the final review before getting committed. @param req: Apache request object @type req: Apache request object @return: Returns the pid resulting in the process @rtype: int ''' def session_bareinit(req): session = get_session(req) try: pinfo = session["personinfo"] if 'ticket' not in pinfo: pinfo["ticket"] = [] except KeyError: pinfo = dict() session['personinfo'] = pinfo pinfo["ticket"] = [] session.dirty = True session_bareinit(req) session = get_session(req) pinfo = session['personinfo'] ticket = session['personinfo']['ticket'] uinfo = collect_user_info(req) pinfo['external_first_entry'] = False try: name = uinfo['external_firstname'] except KeyError: name = '' try: surname = uinfo['external_familyname'] except KeyError: surname = '' if surname: session['personinfo']['arxiv_name'] = nameapi.create_normalized_name( nameapi.split_name_parts(surname + ', ' + name)) else: session['personinfo']['arxiv_name'] = '' session.dirty = True try: arxiv_p_ids = uinfo['external_arxivids'].split(';') except KeyError: arxiv_p_ids = [] #'external_arxivids': 'hep-th/0112017;hep-th/0112020', #'external_familyname': 'Weiler', #'external_firstname': 'Henning', try: found_bibrecs = set(reduce(add, [perform_request_search(p='037:' + str(arx), of='id', rg=0)for arx in arxiv_p_ids])) except (IndexError, TypeError): found_bibrecs = set() #found_bibrecs = [567700, 567744] uid = getUid(req) pid, pid_found = dbapi.get_personid_from_uid([[uid]]) if pid_found: pid = pid[0] else: if picked_profile == None: top5_list = dbapi.find_top5_personid_for_new_arXiv_user(found_bibrecs, nameapi.create_normalized_name(nameapi.split_name_parts(surname + ', ' + name))) return ("top5_list", top5_list) else: pid = dbapi.check_personids_availability(picked_profile, uid) pid_bibrecs = set([i[0] for i in dbapi.get_all_personids_recs(pid, claimed_only=True)]) missing_bibrecs = found_bibrecs - pid_bibrecs #present_bibrecs = found_bibrecs.intersection(pid_bibrecs) #assert len(found_bibrecs) == len(missing_bibrecs) + len(present_bibrecs) tempticket = [] #now we have to open the tickets... #person_papers contains the papers which are already assigned to the person and came from arxive, #they can be claimed regardless for bibrec in missing_bibrecs: tempticket.append({'pid':pid, 'bibref':str(bibrec), 'action':'confirm'}) #check if ticket targets (bibref for pid) are already in ticket for t in list(tempticket): for e in list(ticket): if e['pid'] == t['pid'] and e['bibref'] == t['bibref']: ticket.remove(e) ticket.append(t) session.dirty = True if picked_profile != None and picked_profile != pid and picked_profile != -1: return ("chosen pid not available", pid) elif picked_profile != None and picked_profile == pid and picked_profile != -1: return ("pid assigned by user", pid) else: return ("pid", pid)
def arxiv_login(req): ''' Log in through arxive. If user already associated to a personid, returns the personid. If user has no pid, try to guess which personid to associate based on surname and papers from arxiv. If no compatible person is found, creates a new person. At the end of the process opens a ticket for the user claiming the papers from arxiv. !!! the user will find the open ticket, which will require him to go through the final review before getting committed. @param req: Apache request object @type req: Apache request object @return: Returns the pid resulting in the process @rtype: int ''' def session_bareinit(req): session = get_session(req) try: pinfo = session["personinfo"] if 'ticket' not in pinfo: pinfo["ticket"] = [] except KeyError: pinfo = dict() session['personinfo'] = pinfo pinfo["ticket"] = [] session.save() session_bareinit(req) session = get_session(req) pinfo = session['personinfo'] ticket = session['personinfo']['ticket'] uinfo = collect_user_info(req) pinfo['external_first_entry'] = False try: name = uinfo['external_firstname'] except KeyError: name = None try: surname = uinfo['external_familyname'] except KeyError: surname = None session['personinfo']['arxiv_name'] = nameapi.create_normalized_name( nameapi.split_name_parts(surname + ', ' + name)) session.save() try: arxiv_p_ids = uinfo['external_arxivids'].split(';') except KeyError: arxiv_p_ids = [] #'external_arxivids': 'hep-th/0112017;hep-th/0112020', #'external_familyname': 'Weiler', #'external_firstname': 'Henning', try: found_bibrecs = set(zip(*[perform_request_search(p='037:' + str(arx), of='id', rg=0) for arx in arxiv_p_ids])[0]) except IndexError: found_bibrecs = set() #found_bibrecs = [567700, 567744] uid = getUid(req) pid, pid_found = dbapi.get_personid_from_uid([[uid]]) if not pid_found: pid = dbapi.reclaim_personid_for_new_arXiv_user(found_bibrecs, nameapi.create_normalized_name(nameapi.split_name_parts(surname + ', ' + name)), uid) else: pid = pid[0] pid_bibrecs = set([i[0] for i in dbapi.get_all_personids_recs(pid, claimed_only=True)]) missing_bibrecs = found_bibrecs - pid_bibrecs #present_bibrecs = found_bibrecs.intersection(pid_bibrecs) #assert len(found_bibrecs) == len(missing_bibrecs) + len(present_bibrecs) tempticket = [] #now we have to open the tickets... #person_papers contains the papers which are already assigned to the person and came from arxive, #they can be claimed regardless for bibrec in missing_bibrecs: tempticket.append({'pid':pid, 'bibref':str(bibrec), 'action':'confirm'}) #check if ticket targets (bibref for pid) are already in ticket for t in list(tempticket): for e in list(ticket): if e['pid'] == t['pid'] and e['bibref'] == t['bibref']: ticket.remove(e) ticket.append(t) session.save() return pid
def personid_fast_assign_papers(paperslist=None, use_threading_not_multiprocessing=True): ''' Assign papers to the most compatible person. Compares only the name to find the right person to assign to. If nobody seems compatible, create a new person. ''' class Worker(Thread): def __init__(self, i, p_q, atul, personid_new_id_lock, checker): Thread.__init__(self) self.i = i self.checker = checker self.p_q = p_q self.atul = atul self.personid_new_id_lock = personid_new_id_lock def run(self): while True: if checker.should_stop(): break try: bibrec = self.p_q.get_nowait() except Empty: break close_connection() pfap_assign_paper_iteration(self.i, bibrec, self.atul, self.personid_new_id_lock) def _pfap_assign_paper(i, p_q, atul, personid_new_id_lock, checker): while True: # check bibsched if checker.should_stop(): break try: bibrec = p_q.get_nowait() except Empty: break pfap_assign_paper_iteration(i, bibrec, atul, personid_new_id_lock) _pfap_printmsg('starter', 'Started') if not paperslist: #paperslist = run_sql('select id from bibrec where 1') paperslist = [[x] for x in perform_request_search(p="")] paperslist = [k[0] for k in paperslist] _pfap_printmsg('starter', 'Starting on %s papers ' % len(paperslist)) if use_threading_not_multiprocessing: authornames_table_update_lock = Lock() personid_new_id_lock = Lock() papers_q = Queue() else: authornames_table_update_lock = multiprocessing.Lock() personid_new_id_lock = multiprocessing.Lock() papers_q = multiprocessing.Queue() for p in paperslist: papers_q.put(p) process_list = [] c = 0 if not use_threading_not_multiprocessing: while not papers_q.empty(): checker = status_checker() while len(process_list) <= bconfig.CFG_BIBAUTHORID_MAX_PROCESSES: p = multiprocessing.Process(target=_pfap_assign_paper, args=(c, papers_q, authornames_table_update_lock, personid_new_id_lock, checker)) c += 1 process_list.append(p) p.start() for i, p in enumerate(tuple(process_list)): if not p.is_alive(): p.join() process_list.remove(p) task_sleep_now_if_required(can_stop_too=False) else: max_processes = bconfig.CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS checker = status_checker() workers = [] while not papers_q.empty(): i = 0 while len(workers) < max_processes: w = Worker(i, papers_q, authornames_table_update_lock, personid_new_id_lock, checker) i += 1 w.start() workers.append(w) for c, p in enumerate(tuple(workers)): if not p.is_alive(): p.join() workers.remove(p) task_sleep_now_if_required(can_stop_too=False)
def get_all_valid_bibrecs(): collection_restriction_pattern = " or ".join(["980__a:\"%s\"" % x for x in bconfig.LIMIT_TO_COLLECTIONS]) return perform_request_search(p="%s" % collection_restriction_pattern, rg=0)